/****************************************************************************
* REPLICATION CODE FOR "SCALABLE DEMAND AND MARKUPS"
* 
* This program processes Nielsen scanner data to estimate demand parameters,
* calculate markups, and perform counterfactual analyses across multiple
* product modules over time.
*
* STRUCTURE:
* 1. Setup and Configuration
* 2. Process Raw Nielsen Data
*    2.1. Process Store Files
*    2.2. Process Household Panel Data
*    2.3. Process RMS Version Files & Products
*    2.4. Process Movement Data
* 3. Generate Product Nests via Clustering
* 4. Estimate Demand
*    4.1. Winsorize Prices in the 1% Tails
*    4.2. Normalize Prices & Quantities, Generate Shares and Instruments, Estimate Demand
*    4.3. Generate Tables Summarizing Sample and Demand Estimation Results
* 5. Compute Markups and Generate Tables/Figures
* 6. Compute Counterfactuals
*    6.1. Combine Counterfactual Results with Original Data
*    6.2. Generate Figures from Counterfactual Analysis
* 7. Compute Consumer Surplus
* 8. Additional Analyses
*    8.1. Alternative Clustering Approaches
*    8.2. Market Size Robustness Analysis
*    8.3. Alternative Nesting Structure Analysis
*    8.4. Household Panel Representation
*    8.5. Ownership Concentration Analysis
*    8.6. Shrinkflation Analysis
*    8.7. Product Turnover and Markup Determinants
****************************************************************************/


/***************************************************************************
* 1. SETUP AND CONFIGURATION
***************************************************************************/
* Define directories
global nielsen_scanner_dir "/project/Nielsen/scanner"
global nielsen_panel_dir "/project/Nielsen/panel"
global table_dir "/project/markups/Tables"
global data_dir "/project/markups/Data"


* Here's a summary of the directory structure:
 * /project/Nielsen/scanner - Base scanner data
 * /project/Nielsen/panel - Base panel data
 * /project/sullivan/markups/Data - Data storage
 * /project/sullivan/markups/Tables - Output tables
 * 
 * Multiple subdirectories are created for various data processing steps:
 * - stores: Store reference data
 * - movement: Product movement data
 * - panel: Household panel data
 * - Demand_Results: Model estimation results
 * - supply: Supply-side analysis data
 

* Create base directories if they don't exist
mkdir "$data_dir"
mkdir "$table_dir"

* Create directories to store store data
mkdir "$data_dir/Nielsen"
mkdir "$data_dir/Nielsen/stores"

* Create directories for movement data
mkdir "$data_dir/Nielsen/movement"
mkdir "$data_dir/Nielsen/movement/products/"

* Create directories for panel data
mkdir "$data_dir/Nielsen/panel"
mkdir "$data_dir/Nielsen/panel/retailers"

* Create directories for demand results
mkdir "$data_dir/Demand_Results"


* Create directories for supplemental files
* Put the two supplemental dta files (cpi_quarterly.dta and brand_lookup.dta) and two python scripts (computing_costs.py, computing_counterfactuals.py) here
mkdir "$data_dir/supplemental"

* Create directories for supply data
mkdir "$data_dir/supply"

* Define module list - all product modules to analyze
local module_list "1030 1042 1044 1050 1105 1114 1120 1177 1257 1290 1293 1303 1309 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1498 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7460 7734"

* Define year ranges for scanner and panel data
local year_min = 2006
local year_max = 2018
local year_min_panel = 2004
local year_max_panel = 2018

* Install required packages
ssc install _gwtmean
net install grc1leg, from( http://www.stata.com/users/vwiggins/)
ssc install ranktest
net install ftools, from("https://raw.githubusercontent.com/sergiocorreia/ftools/master/src/")
net install reghdfe, from("https://raw.githubusercontent.com/sergiocorreia/reghdfe/master/src/")
ssc install ivreg2
net install ivreghdfe, from(https://raw.githubusercontent.com/sergiocorreia/ivreghdfe/master/src/)

* Display confirmation message
display "All directory paths have been created."


/***************************************************************************
* 2. PROCESS RAW NIELSEN DATA
***************************************************************************/

/***************************************************************************
* 2.1. PROCESS STORE FILES
* - Gets stores appearing in at least one product module in all years 
* - Filters to stores that appear in at least 10 product modules
***************************************************************************/

* Import Nielsen store files and create consolidated file across all years
foreach yr of numlist `year_min'/`year_max' {
    * Copy and decompress store file for current year
    !cp -vi ${nielsen_scanner_dir}/`yr'/reference/stores_`yr'.tsv.gz ${data_dir}/Nielsen/stores/stores_`yr'.tsv.gz
    !gzip -dv ${data_dir}/Nielsen/stores/stores_`yr'.tsv.gz

    * Import and save as Stata file
    import delimited ${data_dir}/Nielsen/stores/stores_`yr'.tsv, clear
    save ${data_dir}/Nielsen/stores/stores_`yr'.dta, replace
    erase ${data_dir}/Nielsen/stores/stores_`yr'.tsv
}

* Append all years and keep stores appearing in all years
clear
foreach yr of numlist `year_min'/`year_max' {
    append using ${data_dir}/Nielsen/stores/stores_`yr'.dta
}
egen n_years = count(year), by(store_code)
keep if n_years == `year_max'-`year_min'+1

* Fix missing location data for store 7075127
replace fips_state_c = 51 if store_code == 7075127 & fips_state_code == .
replace fips_state_d = "VA" if store_code == 7075127 & fips_state_d == ""
replace fips_county_c = 19 if store_code == 7075127 & fips_county_code == .
replace fips_county_d = "BEDFORD" if store_code == 7075127 & fips_county_d == ""
replace dma_c = 573 if store_code == 7075127 & dma_code == .
replace dma_d = "ROANOKE-LYNCHBURG VA" if store_code == 7075127 & dma_d == ""

sort store_code year
save ${data_dir}/Nielsen/stores/stores_allyrs.dta, replace

* Identify stores that appear in our product modules across all years
* and appear in at least 10 modules per year
foreach pm of numlist `module_list' {
    foreach yr of numlist `year_min'/`year_max' {
        di "Processing module `pm', year `yr'"
        
        * Extract movement data for current module/year
        !gunzip -c ${nielsen_scanner_dir}/`yr'/movement/`pm'_`yr'.tsv.gz > ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv
        
        * Import movement data and keep unique store codes
        import delimited ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv, clear
        keep store_code 
        duplicates drop
        gen year = `yr'
        gen product_module_code = `pm'
        
        save ${data_dir}/Nielsen/movement/stores_in_`pm'_`yr'.dta, replace
        !rm ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv
    }
}

* Combine store information across all modules and years
clear
foreach pm of numlist `module_list' {
    foreach yr of numlist `year_min'/`year_max' {
        append using ${data_dir}/Nielsen/movement/stores_in_`pm'_`yr'.dta
    }
}

* Count how many product modules each store appears in per year
egen pmc_year = count(product_module_code), by(store_c year)
duplicates drop store_c year, force

* Count how many years each store appears
egen n_years = count(year), by(store_c)

* Find the minimum number of product modules per year for each store
egen min_pmc_year = min(pmc_year), by(store_c)

* Keep only one observation per store
duplicates drop store_code, force

* Keep stores that appear in all 13 years and in 10+ product modules each year
gen keep = n_years == 13 & min_pmc_year >= 10
keep if keep == 1
keep store_code
save ${data_dir}/Nielsen/movement/stores_subset, replace

* Clean up temporary files
foreach pm of numlist `module_list' {
    foreach yr of numlist `year_min'/`year_max' {
        !rm ${data_dir}/Nielsen/movement/stores_in_`pm'_`yr'.dta
    }
}


/***************************************************************************
* 2.2. PROCESS THE RAW HOUSEHOLD PANEL DATA
* - Processes household panel data to prepare for clustering
* - Identifies single-person households for sensitivity analysis
***************************************************************************/

* Import retailer file from panel data (gives channel type)
import delimited ${nielsen_panel_dir}/Master_Files/Latest/retailers.tsv, clear
save ${data_dir}/Nielsen/panel/retailers, replace

* Import trips file from panel data (links household code to each trip code)
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    !gunzip -c $nielsen_panel_dir/`yr'/Annual_Files/trips_`yr'.tsv.gz > ${data_dir}/Nielsen/panel/trips_`yr'.tsv
    import delimited ${data_dir}/Nielsen/panel/trips_`yr'.tsv, clear
    save ${data_dir}/Nielsen/panel/trips_`yr', replace
    !rm ${data_dir}/Nielsen/panel/trips_`yr'.tsv
}

* Import purchases file from panel data
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    !gunzip -c ${nielsen_panel_dir}/`yr'/Annual_Files/purchases_`yr'.tsv.gz > ${data_dir}/Nielsen/panel/purchases_`yr'.tsv
    import delimited ${data_dir}/Nielsen/panel/purchases_`yr'.tsv, clear
    save ${data_dir}/Nielsen/panel/purchases_`yr', replace
    !rm ${data_dir}/Nielsen/panel/purchases_`yr'.tsv
}

* Link purchases to trips and retailer information, flag grocery stores
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    * Merge purchases with trips
    use ${data_dir}/Nielsen/panel/purchases_`yr', clear
    merge m:1 trip_code_uc using ${data_dir}/Nielsen/panel/trips_`yr'
    drop if _merge != 3
    drop _merge
    
    * Merge with retailers to get channel information
    merge m:1 retailer_code using ${data_dir}/Nielsen/panel/retailers
    drop if _merge != 3
    drop _merge
    
    * Create grocery store indicator
    gen grocery = channel == "Grocery"
    
    save ${data_dir}/Nielsen/panel/hh_purchase_`yr', replace
    
    * Clean up temporary files
    !rm ${data_dir}/Nielsen/panel/trips_`yr'.dta
    !rm ${data_dir}/Nielsen/panel/purchases_`yr'.dta
}

* Get counts of UPCs in grocery and overall, by year
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    use ${data_dir}/Nielsen/panel/hh_purchase_`yr', clear
    
    * Create grocery flag by UPC
    egen max_grocery = max(grocery), by(upc upc_ver)
    
    * Count occurrences of each UPC overall
    egen count_upc = count(quantity), by(upc upc_ver)
    
    * Count occurrences of each UPC in grocery stores
    egen tcount_upc_grocery = count(quantity) if grocery == 1, by(upc upc_ver)
    egen count_upc_grocery = mean(tcount_upc_grocery), by(upc upc_ver)
    
    * Keep UPC-level data
    keep upc upc_ver max_grocery count_upc count_upc_grocery
    duplicates drop
    
    save ${data_dir}/Nielsen/panel/hh_purchase_upcs_`yr', replace
}

* Aggregate UPC counts for first half of years
local end = `year_min_panel' + ceil((`year_max_panel'-`year_min_panel')/2)
clear
foreach yr of numlist `year_min_panel'/`end' {
    append using ${data_dir}/Nielsen/panel/hh_purchase_upcs_`yr'
}
drop max_grocery
replace count_upc_grocery = 0 if count_upc_grocery == . 

* Sum counts across years by UPC
egen tcount_upc_grocery = sum(count_upc_grocery), by(upc upc_ver)
egen tcount_upc = sum(count_upc), by(upc upc_ver)

* Rename and clean
drop count_upc count_upc_grocery
rename tcount_upc count_upc 
rename tcount_upc_grocery count_upc_grocery
duplicates drop 
save ${data_dir}/Nielsen/panel/hh_purchase_upcs_1sthalf, replace

* Aggregate UPC counts for second half of years
clear
local start = `end' + 1
foreach yr of numlist `end'/`year_max_panel' {
    append using ${data_dir}/Nielsen/panel/hh_purchase_upcs_`yr'
}
drop max_grocery
replace count_upc_grocery = 0 if count_upc_grocery == . 
egen tcount_upc_grocery = sum(count_upc_grocery), by(upc upc_ver)
egen tcount_upc = sum(count_upc), by(upc upc_ver)
drop count_upc count_upc_grocery
rename tcount_upc count_upc 
rename tcount_upc_grocery count_upc_grocery
duplicates drop 
save ${data_dir}/Nielsen/panel/hh_purchase_upcs_2ndhalf, replace

* Combine first and second half data
clear 
use ${data_dir}/Nielsen/panel/hh_purchase_upcs_1sthalf
append using ${data_dir}/Nielsen/panel/hh_purchase_upcs_2ndhalf
egen tcount_upc_grocery = sum(count_upc_grocery), by(upc upc_ver)
egen tcount_upc = sum(count_upc), by(upc upc_ver)
drop count_upc count_upc_grocery
rename tcount_upc count_upc 
rename tcount_upc_grocery count_upc_grocery
duplicates drop 
save ${data_dir}/Nielsen/panel/hh_purchase_upcs_allyrs, replace

* Prepare household purchase data for clustering
clear
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    use ${data_dir}/Nielsen/panel/hh_purchase_`yr', clear
    keep if grocery == 1
    keep upc upc_ver household_code
    duplicates drop 
    save ${data_dir}/Nielsen/panel/for_correlation_`yr', replace
    !rm ${data_dir}/Nielsen/panel/hh_purchase_`yr'.dta
}

* Combine household purchase data across all years
clear
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    append using ${data_dir}/Nielsen/panel/for_correlation_`yr'
}
duplicates drop
save ${data_dir}/Nielsen/panel/for_correlation_allyr, replace

* Create list of single-person households
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    !gunzip -c $nielsen_panel_dir/`yr'/Annual_Files/panelists_`yr'.tsv.gz > ${data_dir}/Nielsen/panel/panelists_`yr'.tsv
    import delimited ${data_dir}/Nielsen/panel/panelists_`yr'.tsv, clear
    keep if household_size == 1
    keep household_cd
    rename household_cd household_code
    sort household_code
    save ${data_dir}/Nielsen/panel/single_hh_`yr', replace
}

* Combine single-person households across all years
clear
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    append using ${data_dir}/Nielsen/panel/single_hh_`yr'
}
duplicates drop
save ${data_dir}/Nielsen/panel/single_hh_allyr, replace

* Clean up temporary files
foreach yr of numlist `year_min_panel'/`year_max_panel' {
    !rm ${data_dir}/Nielsen/panel/single_hh_`yr'.dta
}
!rm ${data_dir}/Nielsen/panel/hh_purchase_upcs_2ndhalf.dta
!rm ${data_dir}/Nielsen/panel/hh_purchase_upcs_1sthalf.dta


/***************************************************************************
* 2.3. PROCESS RMS VERSION FILES & PRODUCTS FILE
* - Imports RMS version files (UPC versions change over time)
* - Processes product information
***************************************************************************/

* Import RMS version files
foreach yr of numlist `year_min'/`year_max' {
    !gunzip -c ${nielsen_scanner_dir}/`yr'/reference/rms_versions_`yr'.tsv.gz > ${data_dir}/Nielsen/movement/rms_versions_`yr'.tsv
    import delimited ${data_dir}/Nielsen/movement/rms_versions_`yr'.tsv, clear
    sort upc upc_ver
    save ${data_dir}/Nielsen/movement/rms_versions_`yr', replace
    !rm ${data_dir}/Nielsen/movement/rms_versions_`yr'.tsv
}

* Import and save products master file
import delimited ${nielsen_scanner_dir}/Master_Files/Latest/products.tsv, clear bindquotes(nobind)
save ${data_dir}/Nielsen/movement/products, replace

* Create module-specific product files
foreach pm of numlist `module_list' {
    preserve
    keep if product_module_code == `pm'
    save ${data_dir}/Nielsen/movement/products/products_`pm', replace
    restore
}
clear


/***************************************************************************
* 2.4. PROCESS MOVEMENT DATA INTO UPC-STORE-WEEK SAMPLE
* - Creates weekly UPC-store level sample with pricing and quantity
* - Filters to most purchased UPCs
***************************************************************************/

foreach pm of numlist `module_list' {
    di "Processing module `pm'"
    
    foreach yr of numlist `year_min'/`year_max' {
        di "  Year `yr'"
        
        * Get movement data for current module/year
        !gunzip -c ${nielsen_scanner_dir}/`yr'/movement/`pm'_`yr'.tsv.gz > ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv
        import delimited ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv, clear
        
        * Convert week_end to string for date handling
        tostring week_end, gen(date)
        
        * Extract month and quarter
        gen month = substr(date, 5, 2)
        destring month, replace
        gen quarter = floor((month-1)/3) + 1
        
        * Calculate total units and average weekly sales by quarter-store-UPC
        egen tot_units = sum(units), by(store_code_uc upc quarter)
        egen avg_weekly_sales_by_quarter = mean(units), by(store_code_uc upc quarter)
        
        * Merge with RMS versions to get UPC version
        merge m:1 upc using ${data_dir}/Nielsen/movement/rms_versions_`yr'
        drop if _merge != 3
        drop _merge
        
        * Merge with product information
        merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/products/products_`pm'
        drop if _merge != 3
        drop _merge
        
        * Create filtered sample
        preserve
        * Collapse to quarter level for sales thresholds
        duplicates drop store_code_uc upc quarter, force
        drop units
        rename tot_units units
        
        * Group UPCs
        egen prod = group(upc upc_ver)
        
        * Calculate sales volume metrics for filtering
        egen upc_quantity = sum(units), by(upc upc_ver)
        egen tot_quantity = sum(units)
        
        * Sort by product and prepare for cumulative calculation
        sort prod 
        gen ind = prod[_n] != prod[_n-1]
        gsort -ind -upc_quantity
        
        * Calculate cumulative sales fraction and find 85% threshold
        gen running_total = sum(upc_quantity) if ind == 1
        gen frac = running_total / tot_quantity if ind == 1
        gen tmp = frac >= .85 if ind == 1
        egen cutoff = min(frac) if tmp == 1
        gen keep_ind = frac <= cutoff if ind == 1
        egen keep_upc = mean(keep_ind), by(upc upc_ver)
        
        * Keep top UPCs accounting for 85% of sales volume
        keep if keep_upc == 1
        
        * Limit to UPCs that appear in household grocery purchases
        merge m:1 upc upc_ver using ${data_dir}/Nielsen/panel/hh_purchase_upcs_allyrs
        keep if count_upc_grocery > 0 & _merge == 3
        drop _merge
        
        * Limit to stores in our subset
        merge m:1 store_code using ${data_dir}/Nielsen/movement/stores_subset
        keep if _merge == 3
        drop _merge
        
        * Keep only UPCs with sufficient average weekly sales
        keep if avg_weekly_sales_by_quarter >= 10
        
        * Keep key variables for UPC identification
        keep upc upc_ver brand_d upc_descr size1_a multi
        gen year = `yr'
        duplicates drop
        
        save ${data_dir}/Nielsen/movement/upc_`pm'_`yr', replace
        restore
        
        * Merge with filtered UPC list
        merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/upc_`pm'_`yr', gen(_mergeUPC)
        drop if _mergeUPC != 3
        drop _mergeUPC
        
        * Merge with store subset
        merge m:1 store_code using ${data_dir}/Nielsen/movement/stores_subset, gen(_mergeStore)
        keep if _mergeStore == 3
        drop _mergeStore
        
        * Keep only store-UPCs with sufficient average weekly sales
        keep if avg_weekly_sales_by_quarter >= 10
        
        save ${data_dir}/Nielsen/movement/`pm'_`yr'_movement_weekly, replace
        !rm ${data_dir}/Nielsen/movement/movement_`pm'_`yr'.tsv
    }
}


/***************************************************************************
* 3. GENERATE NESTS FROM HOUSEHOLD DATA VIA CLUSTERING
* - Uses household purchase patterns to identify product nests (groups)
* - These nests are used in nested logit demand estimation
***************************************************************************/

/***************************************************************************
* 3.1 GET UPCS IN CLEANED MOVEMENT DATA
***************************************************************************/

foreach pm of numlist `module_list' {
    di "Processing module `pm' UPCs"
    clear
    foreach yr of numlist `year_min'/`year_max' {
        di "  Year `yr'"
        append using ${data_dir}/Nielsen/movement/upc_`pm'_`yr'
    }
    drop year
    duplicates drop
    gen product_module_code = `pm'
    save ${data_dir}/Nielsen/movement/upc_`pm', replace
}


/***************************************************************************
* 3.2 RUN CLUSTERING ALGORITHM ON HOUSEHOLD DATA
* - Uses household purchase patterns to identify product clusters
* - Implements Ward's linkage method with correlation distance
***************************************************************************/

foreach pm of numlist `module_list' {
    di "Clustering module `pm'"
    
    * Get household purchase data for current module
    use ${data_dir}/Nielsen/panel/for_correlation_allyr, clear
    merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/upc_`pm'
    keep if _merge == 3
    drop _merge
    
    * Create unique product identifier
    egen product = group(upc upc_ver)
    save ${data_dir}/Nielsen/panel/for_correlation_all_new_`pm', replace
    
    * Prepare data for clustering
    keep product household_code product_module_code
    gen purchase = 1
    reshape wide purchase, i(household_code product_m) j(product)
    
    * Replace missing values with zeros
    foreach var of varlist purchase* {
        quietly replace `var' = 0 if `var' == .
    }
    
    * Calculate correlation-based dissimilarity matrix
    matrix dissimilarity DD = purchase*, correlation dissim(oneminus) var 
    
    * Run Ward's linkage clustering
    clustermat wardslinkage DD, shape(full) clear name(cluster_results) label(product)
    sort cluster_results_id
    
    * Use Duda-Hart stopping rule to determine optimal number of clusters
    dudahart, dist(DD) ngroups(30) id(cluster_results_id)
    
    * Find optimal number of clusters based on Duda-Hart criterion
    local i = 1
    local j = `i' + 1
    while `r(dudat2_`i')' > `r(dudat2_`j')' {
        local i = `i' + 1
        local j = `j' + 1
    }
    gen n_clusters = `i'
    
    * Generate cluster assignments
    cluster gen cluster_ids = groups(`i')
    
    * Clean up and prepare for merging
    gen temp = substr(product, 9, .)
    destring temp, replace
    keep temp cluster_ids n_clusters
    rename temp product
    
    * Merge back with original data
    merge 1:m product using ${data_dir}/Nielsen/panel/for_correlation_all_new_`pm'
    
    * Keep only necessary variables
    keep product_module_c upc upc_ver cluster_ids n_clusters
    order product_module_c upc upc_ver cluster_ids n_clusters
    duplicates drop
    
    save ${data_dir}/Nielsen/panel/clustering_results_new_`pm', replace

    * Save number of clusters per module
    keep product_module_c n_clusters
    duplicates drop
    save ${data_dir}/Nielsen/panel/module_nclusters_new_`pm', replace
}


/***************************************************************************
* 4. ESTIMATE DEMAND
* - Implements nested logit demand estimation
* - Calculates elasticities and generates tables
***************************************************************************/

/***************************************************************************
* 4.1 WINSORIZE PRICES IN THE 1% TAILS
* - Trim extreme prices to reduce the impact of outliers
***************************************************************************/

foreach pm of numlist `module_list' {
    di "Winsorizing prices for module `pm'"
    
    foreach yr of numlist `year_min'/`year_max' {
        use ${data_dir}/Nielsen/movement/`pm'_`yr'_movement_weekly, clear
        
        * Drop unnecessary variables
        drop feature display panel_year product_group* department* dataset 
        
        * Rename and adjust variables
        gen quantity = units
        replace price = price/prmult
        
        * Group by UPC
        egen prod = group(upc upc_ver)
        
        * Winsorize prices at 1st and 99th percentile by UPC
        levelsof prod, local(levels)
        foreach uu of local levels {
            quietly count if prod == `uu'
            quietly sum price if prod == `uu', det
            quietly replace price = `r(p1)' if price <= `r(p1)' & prod == `uu'
            quietly replace price = `r(p99)' if price >= `r(p99)' & prod == `uu'
        }
        
        save ${data_dir}/Nielsen/movement/`pm'_`yr'_movement_weekly, replace
    }
}


/***************************************************************************
* 4.2 NORMALIZE PRICES & QUANTITIES, GENERATE SHARES AND INSTRUMENTS, ESTIMATE DEMAND
* - Normalizes prices and quantities to account for package size differences
* - Generates market shares and instruments for demand estimation
* - Estimates nested logit demand model
***************************************************************************/

foreach pm of numlist `module_list' {
    di "Estimating demand for module `pm'"
    clear
    
    * Combine data across years
    foreach yr of numlist `year_min'/`year_max' {
        append using ${data_dir}/Nielsen/movement/`pm'_`yr'_movement_weekly
    }
    
    * Merge with store information
    merge m:1 store_c year using ${data_dir}/Nielsen/stores/stores_allyrs.dta
    drop if _merge != 3
    drop _merge
    
    * Merge clustering results
    merge m:1 product_module_code upc upc_ver using ${data_dir}/Nielsen/panel/clustering_results_new_`pm'
    drop if _merge != 3
    drop _merge
    
    * Merge quarterly CPI for price deflation
    merge m:1 year quarter using ${data_dir}/supplemental/cpi_quarterly
    drop if _merge != 3
    drop _merge
    
    * Deflate prices to 2010 base
    gen temp = cpi if year == 2010 & quarter == 1
    egen ref_cpi = mean(temp)
    gen price_deflated = ref_cpi/cpi*price
    drop temp
    
    * Generate normalized prices and quantities
    gen double log_price = log(price_deflated)
    gen double log_size1_amount = log(size1_amount)
    replace size1_units = "MISSING" if size1_units == ""
    encode size1_units, gen(units_temp)
    
    * Regress log price on package characteristics
    reg log_price i.multi log_size1_amount i.units_temp
    predict log_price_xb, xb
    
    * Generate normalized price and quantity
    gen double price_normalized = price_deflated*exp(-log_price_xb)
    gen double quantity_normalized = quantity*exp(log_price_xb)
    drop log_* 
    
    * Generate Hausman instruments (avg price of UPC in other regions)
    * First encode geographic regions
    gen region_code = 4 if fips_state_desc == "ME" | fips_state_desc == "NY" | fips_state_desc == "PA" | fips_state_desc == "NJ" | fips_state_desc == "CT" | fips_state_desc == "RI" | fips_state_desc == "MA" | fips_state_desc == "NH" | fips_state_desc == "VT"
	replace region_code = 3 if fips_state_desc == "MD" | fips_state_desc == "DE" | fips_state_desc == "DC" | fips_state_desc == "VA" | fips_state_desc == "WV" | fips_state_desc == "KY" | fips_state_desc == "TN" | fips_state_desc == "NC" | fips_state_desc == "SC" | fips_state_desc == "GA" | fips_state_desc == "FL" | fips_state_desc == "AL" | fips_state_desc == "MS" | fips_state_desc == "LA" | fips_state_desc == "AR" | fips_state_desc == "OK" | fips_state_desc == "TX"
	replace region_code = 2 if fips_state_desc == "ND" | fips_state_desc == "SD" | fips_state_desc == "NE" | fips_state_desc == "KS" | fips_state_desc == "MN" | fips_state_desc == "IA" | fips_state_desc == "MO" | fips_state_desc == "WI" | fips_state_desc == "IL" | fips_state_desc == "MI" | fips_state_desc == "IN" | fips_state_desc == "OH"
	replace region_code = 1 if fips_state_desc == "WA" | fips_state_desc == "ID" | fips_state_desc == "MT" | fips_state_desc == "WY" | fips_state_desc == "OR" | fips_state_desc == "CA" | fips_state_desc == "NV" | fips_state_desc == "UT" | fips_state_desc == "CO" | fips_state_desc == "AZ" | fips_state_desc == "NM" | fips_state_desc == "AK" | fips_state_desc == "HI"
    
    * Calculate regional prices (excluding own DMA)
    egen tmp_reg = sum(quantity_normalized), by(date region_code upc upc_ver)
    egen tmp_both = sum(quantity_normalized), by(date region_code dma_code upc upc_ver) 
    gen tmp_denom = tmp_reg - tmp_both
    egen tmp_num_both = sum(price_normalized*quantity_normalized), by(date region_code dma_code upc upc_ver)
    egen tmp_num_reg = sum(price_normalized*quantity_normalized), by(date region_code upc upc_ver)
    gen tmp_num = tmp_num_reg - tmp_num_both
    gen z_hausman = tmp_num/tmp_denom if tmp_denom != 0 & tmp_denom > 0 & tmp_num > 0
    drop tmp_*
    
    * Count products per market and nest
    egen num_prods = count(price_norm), by(date store_c)
    egen num_prods_nest = count(price_norm), by(date store_c cluster_ids)
    egen num_prods_quarter = count(price_norm), by(year quarter store_c)
    egen num_prods_nest_quarter = count(price_norm), by(year quarter store_c cluster_ids)
    
    * Generate market shares
    egen inside_quant_norm = sum(quantity_normalized), by(store_code date)
    egen max_inside_qunat_norm = max(inside_quant_norm), by(store_code year)
    gen market_size = max_inside_qunat_norm*2
    gen shares = quantity_norm/market_size
    egen inside_share = sum(shares), by(store_code date)
    gen outside_share = 1-inside_share
    egen nest_quantity_norm = sum(quantity_normalized), by(store_code date cluster_ids)
    gen ln_sjg = log(quantity_norm/nest_quantity_norm)
    gen y_var = log(shares/outside_share)
    egen upc_fe = group(upc)
    egen dma_brand_quarter = group(dma_c brand_code_uc year quarter)
    
    * Run demand estimation year by year
    foreach yr of numlist `year_min'/`year_max' {
        preserve
        drop if year != `yr'
        egen market_ids = group(store_code week_end)
        
        * Instrumental variables regression with fixed effects
        ivreghdfe y_var (ln_sjg price_normalized = num_prods num_prods_nest z_hausman), absorb(upc_fe dma_brand_quarter) cluster(market_ids)
        
        * Save parameter estimates
        gen alpha_nl = _b[price_normalized]  
        gen sigma_nl = _b[ln_sjg] 
        gen se_alpha_nl = _se[price_normalized]  
        gen se_sigma_nl = _se[ln_sjg] 
        gen delta_nl = y_var - _b[ln_sjg]*ln_sjg     
        gen F = e(widstat)
        drop market_ids
        
        save ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24, replace
        
        * Save demand parameters for later use
        keep alpha_nl sigma_nl se_alpha_nl se_sigma_nl product_module_*
        duplicates drop
        save ${data_dir}/Demand_Results/NL_`pm'_`yr'_demandparmas, replace
        
        * Clean up temporary files
        !rm ${data_dir}/Nielsen/movement/`pm'_`yr'_movement_weekly.dta
        restore
    }    
}



/***************************************************************************
* 4.3 GENERATE TABLES SUMMARIZING SAMPLE, CLUSTERING AND DEMAND ESTIMATION RESULTS
* - Creates Tables 1-6 and Appendix Tables 14-16, 22
***************************************************************************/

* Compile statistics across modules and years
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Calculating statistics for module `pm', year `yr'"
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta, clear
        
        * Calculate own-price elasticity and upc revenue 
        gen ope = alpha_nl*price_normalized*(1/(1-sigma_nl)-sigma_nl/(1-sigma_nl)*exp(ln_sjg)-shares)
        gen revenue_pooled = quantity * price
        
        * Calculate summary statistics
        egen out_revenue = sum(revenue_pooled)
        egen out_ope_mean = mean(ope)
        egen out_ope_mean_rw = wtmean(ope), weight(revenue_pooled)
        
        * Count stores and UPCs
        egen id_stores = group(store_code)
        egen id_prod = group(upc upc_ver)
        sum id_stores
        gen out_num_stores = `r(max)'
        sum id_prod
        gen out_num_upcs = `r(max)'
        
        * Save parameter estimates
        gen out_alpha = alpha_nl
        gen out_sigma = sigma_nl
        gen out_n_clusters = n_clusters
        
        * Keep only module-level statistics
        keep product_module_code product_module_descr year out_* 
        duplicates drop
        save ${data_dir}/Demand_Results/stats_`pm'_`yr', replace
    }
}

* Combine statistics across all modules and years
clear
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Appending stats for module `pm', year `yr'"
        append using ${data_dir}/Demand_Results/stats_`pm'_`yr'        
    }
}
save ${data_dir}/Demand_Results/stats_allpmyr_weekly_05_24, replace            

* Prepare data for tables
use ${data_dir}/Demand_Results/stats_allpmyr_weekly_05_24, clear 
drop if year == 0 | out_alpha >=0        
gen out_revenue_mils = round(out_revenue/1000000, 0.1)        
reshape wide out*, i(product_module_*) j(year)
gen rev_ends = out_revenue2006 + out_revenue2018
gsort -rev_ends
gen rev_rank = _n
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")
    
* Table 1: Sample Description (10 Largest Product Markets)
di "Generating Table 1"
gsort -rev_ends
file open table1 using "${table_dir}/Table1.tex", write replace
file write table1 "\begin{tabular}{lrrrrrrr}"_n 
file write table1 "\toprule "_n 
file write table1 " & \multicolumn{2}{c}{Stores} & \multicolumn{2}{c}{UPCs} & \multicolumn{2}{c}{Revenues} & \tabularnewline \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7}"_n 
file write table1 " &\multicolumn{1}{l}{Product Module} & 2006 & 2018 & 2006 & 2018 & 2006 & 2018 &  Nests   \tabularnewline "_n 
file write table1 "\midrule  "_n 
file close table1

* Write top 10 product modules
local N = 10
forvalues i = 1(1)`N' {
    local pd = pd[`i']
    local v1 = (out_num_stores2006[`i'])
    local v2 = (out_num_stores2018[`i'])
    local v3 = (out_num_upcs2006[`i'])
    local v4 = (out_num_upcs2018[`i'])
    local v5 = (out_revenue_mils2006[`i'])
    local v6 = (out_revenue_mils2018[`i'])
    local v7 = (out_n_clusters2006[`i'])
    
    local fmtv1 : display %6.0fc `v1'
    local fmtv2 : display %6.0fc `v2'
    local fmtv3 : display %6.0fc `v3'
    local fmtv4 : display %6.0fc `v4'
    local fmtv5 : display %8.1fc `v5'
    local fmtv6 : display %8.1fc `v6'
    local fmtv7 : display %6.0fc `v7'
    
    file open table1 using "${table_dir}/Table1.tex", write append
    file write table1 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'\\ "_n 
    file close table1
}

* Add median values
file open table1 using "${table_dir}/Table1.tex", write append
sum out_num_stores2006, d
local v1 = r(p50)
sum out_num_stores2018, d
local v2 = r(p50)
sum out_num_upcs2006, d
local v3 = r(p50)
sum out_num_upcs2018, d
local v4 = r(p50)
sum out_revenue_mils2006, d
local v5 = r(p50)
sum out_revenue_mils2018, d
local v6 = r(p50)
sum out_n_clusters2006, d
local v7 = r(p50)

local fmtv1 : display %6.0fc `v1'
local fmtv2 : display %6.0fc `v2'
local fmtv3 : display %6.0fc `v3'
local fmtv4 : display %6.0fc `v4'
local fmtv5 : display %8.1fc `v5'
local fmtv6 : display %8.1fc `v6'
local fmtv7 : display %6.0fc `v7'
file write table1 "\midrule  "_n 
file write table1 "Median & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'\\ "_n 
file write table1 "\bottomrule\end{tabular} "_n     
file close table1

* Appendix Table A14 (Sample Description: All 75 Product Markets)
di "Generating Table A14"
sort pd 
file open tableA14 using "${table_dir}/TableA14.tex", write replace
file write tableA14 " \\\toprule"_n 
file write tableA14 "  & \multicolumn{2}{c}{Stores} & \multicolumn{2}{c}{UPCs} & \multicolumn{2}{c}{Revenues} & \tabularnewline\cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7}"_n 
file write tableA14 "\multicolumn{1}{l}{Product Market} & 2006 & 2018 & 2006 & 2018 & 2006 & 2018 &    Nests \tabularnewline\midrule"_n 
file write tableA14 "\endfirsthead"_n
file write tableA14 "\toprule"_n 
file write tableA14 "& \multicolumn{2}{c}{Stores} & \multicolumn{2}{c}{UPCs} & \multicolumn{2}{c}{Revenues} & \tabularnewline\cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7}"_n 
file write tableA14 " \multicolumn{1}{l}{Product Market}  & 2006 & 2018 & 2006 & 2018 & 2006 & 2018 &   Nests  \tabularnewline \midrule"_n
file write tableA14 "\endhead"_n 
file write tableA14 "\midrule \multicolumn{9}{r}{{Continued on next page}} \\ \bottomrule"_n
file write tableA14 "\endfoot"_n
file write tableA14 "\endlastfoot"_n
file write tableA14 "\hline"_n
file close tableA14

* Write all product modules
local N = _N
forvalues i = 1(1)`N' {
    local pd = pd[`i']
    local v1 = string(out_num_stores2006[`i'])
    local v2 = string(out_num_stores2018[`i'])
    local v3 = string(out_num_upcs2006[`i'])
    local v4 = string(out_num_upcs2018[`i'])
    local v5 = string(out_revenue_mils2006[`i'])
    local v6 = string(out_revenue_mils2018[`i'])
    local v7 = string(out_n_clusters2006[`i'])
    
    local fmtv1 : display %6.0fc `v1'
    local fmtv2 : display %6.0fc `v2'
    local fmtv3 : display %6.0fc `v3'
    local fmtv4 : display %6.0fc `v4'
    local fmtv5 : display %8.1fc `v5'
    local fmtv6 : display %8.1fc `v6'
    local fmtv7 : display %6.0fc `v7'
    
    file open tableA14 using "${table_dir}/TableA14.tex", write append
    file write tableA14 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'\\ "_n
    file close tableA14
}
file open tableA14 using "${table_dir}/TableA14.tex", write append
file write tableA14 "\bottomrule"_n     
file close tableA14

* Table A15 (Demand Parameters: All 75 Product Markets)
di "Generating Table A15"
sort pd 
gen alpha_diff = out_alpha2018-out_alpha2006
gen sigma_diff = out_sigma2018-out_sigma2006
file open tableA15 using "${table_dir}/TableA15.tex", write replace
file write tableA15 "\\\toprule"_n
file write tableA15 "Product Market &  $\alpha_{2006}$ & $\alpha_{2012}$ &  $\alpha_{2018}$  &  $\sigma_{2006}$ & $\sigma_{2012}$ &  $\sigma_{2018}$  &   $\Delta\alpha$ &   $\Delta\sigma$  \tabularnewline  \midrule"_n
file write tableA15 "\endfirsthead"_n
file write tableA15 "\toprule"_n
file write tableA15 "Product Market &  $\alpha_{2006}$ & $\alpha_{2012}$ &  $\alpha_{2018}$  &  $\sigma_{2006}$ & $\sigma_{2012}$ &  $\sigma_{2018}$  &   $\Delta\alpha$  &   $\Delta\sigma$ \tabularnewline  \midrule"_n
file write tableA15 "\endhead "_n
file write tableA15 "\midrule \multicolumn{9}{r}{{Continued on next page}} \\ \bottomrule"_n
file write tableA15 "\endfoot"_n
file write tableA15 "\endlastfoot"_n
file write tableA15 "\hline"_n
file close tableA15

* Write parameter values for all product modules
local N = _N
forvalues i = 1(1)`N' {
    local pd = pd[`i']
    local v1 = round(out_alpha2006[`i'], 0.01)
    local v2 = round(out_alpha2012[`i'], 0.01)
    local v3 = round(out_alpha2018[`i'], 0.01)
    local v4 = round(out_sigma2006[`i'], 0.01)
    local v5 = round(out_sigma2012[`i'], 0.01)
    local v6 = round(out_sigma2018[`i'], 0.01)
    local v7 = round(alpha_diff[`i'], 0.01)
    local v8 = round(sigma_diff[`i'], 0.01)
    
    local fmtv1 : display %4.2f `v1'
    local fmtv2 : display %4.2f `v2'
    local fmtv3 : display %4.2f `v3'
    local fmtv4 : display %4.2f `v4'
    local fmtv5 : display %4.2f `v5'
    local fmtv6 : display %4.2f `v6'
    local fmtv7 : display %4.2f `v7'
    local fmtv8 : display %4.2f `v8'
    
    file open tableA15 using "${table_dir}/TableA15.tex", write append
    file write tableA15 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' & `fmtv7'& `fmtv8'\\ "_n 
    file close tableA15
}
file open tableA15 using "${table_dir}/TableA15.tex", write append    
file write tableA15 "\bottomrule"_n     
file close tableA15

* Table 5 (Demand Parameters: Top 10 Product Markets)
di "Generating Table 5"
gsort -rev_ends 
file open table5 using "${table_dir}/Table5.tex", write replace
file write table5 "\begin{tabular}{lcccccccc}"_n
file write table5 "\toprule"_n 
file write table5 "Product Module &  $\alpha_{2006}$ & $\alpha_{2012}$ &  $\alpha_{2018}$  &  $\sigma_{2006}$ & $\sigma_{2012}$ &  $\sigma_{2018}$  &   {$\Delta\alpha$}  &   {$\Delta\sigma$}  \tabularnewline"_n  
file write table5 "\midrule"_n
file close table5

* Write parameter values for top 10 modules (excluding certain modules)
local N = 10
forvalues i = 1(1)`N' {
    if inlist(product_module_code[`i'], 1042, 1303, 1309, 1498, 7460) == 0 {
        
        local pd = pd[`i']
        local v1 = round(out_alpha2006[`i'], 0.01)
        local v2 = round(out_alpha2012[`i'], 0.01)
        local v3 = round(out_alpha2018[`i'], 0.01)
        local v4 = round(out_sigma2006[`i'], 0.01)
        local v5 = round(out_sigma2012[`i'], 0.01)
        local v6 = round(out_sigma2018[`i'], 0.01)
        local v7 = round(alpha_diff[`i'], 0.01)
        local v8 = round(sigma_diff[`i'], 0.01)
        
        local fmtv1 : display %4.2f `v1'
        local fmtv2 : display %4.2f `v2'
        local fmtv3 : display %4.2f `v3'
        local fmtv4 : display %4.2f `v4'
        local fmtv5 : display %4.2f `v5'
        local fmtv6 : display %4.2f `v6'
        local fmtv7 : display %4.2f `v7'
        local fmtv8 : display %4.2f `v8'
        
        file open table5 using "${table_dir}/Table5.tex", write append
        file write table5 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' & `fmtv7'& `fmtv8'\\ "_n 
        file close table5
    }
}

* Add median values
file open table5 using "${table_dir}/Table5.tex", write append
sum out_alpha2006, d
local v1 = r(p50)
sum out_alpha2012, d
local v2 = r(p50)
sum out_alpha2018, d
local v3 = r(p50)
sum out_sigma2006, d
local v4 = r(p50)
sum out_sigma2012, d
local v5 = r(p50)
sum out_sigma2018, d
local v6 = r(p50)
sum alpha_diff, d
local v7 = r(p50)
sum sigma_diff, d
local v8 = r(p50)

local fmtv1 : display %4.2f `v1'
local fmtv2 : display %4.2f `v2'
local fmtv3 : display %4.2f `v3'
local fmtv4 : display %4.2f `v4'
local fmtv5 : display %4.2f `v5'
local fmtv6 : display %4.2f `v6'
local fmtv7 : display %4.2f `v7'
local fmtv8 : display %4.2f `v8'
file write table5 "\midrule  "_n 
file write table5 "Median & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'&`fmtv8'\\ "_n 
file write table5 "\bottomrule\end{tabular} "_n     
file close table5

* Table 6 (Own-Price Elasticity: Top 10 Product Markets)
di "Generating Table 6"
gsort -rev_ends 
file open table6 using "${table_dir}/Table6.tex", write replace
file write table6 "\begin{tabular}{lrrrrrrrr}"_n
file write table6 "\hline "_n
file write table6 "\multicolumn{1}{l}{Product Module} & \multicolumn{4}{c}{Unweighted} & \multicolumn{4}{c}{Sales-Weighted}   \tabularnewline"_n
file write table6 "   & 2006 & 2010 & 2014 & 2018  & 2006 & 2010 & 2014 & 2018  \tabularnewline  \hline"_n
file write table6 "\hline"_n 
file close table6

* Write elasticities for top 10 modules (excluding certain modules)
local N = 10
forvalues i = 1(1)`N' {
    if inlist(product_module_code[`i'], 1042, 1303, 1309, 1498, 7460) == 0 {
        local pd = pd[`i']
        local v1 = (round(out_ope_mean2006[`i'], 0.01))
        local v2 = (round(out_ope_mean2010[`i'], 0.01))
        local v3 = (round(out_ope_mean2014[`i'], 0.01))
        local v4 = (round(out_ope_mean2018[`i'], 0.01))
        local v5 = (round(out_ope_mean_rw2006[`i'], 0.01))
        local v6 = (round(out_ope_mean_rw2010[`i'], 0.01))
        local v7 = (round(out_ope_mean_rw2014[`i'], 0.01))
        local v8 = (round(out_ope_mean_rw2018[`i'], 0.01))
        
        local fmtv1 : display %4.2f `v1'
        local fmtv2 : display %4.2f `v2'
        local fmtv3 : display %4.2f `v3'
        local fmtv4 : display %4.2f `v4'
        local fmtv5 : display %4.2f `v5'
        local fmtv6 : display %4.2f `v6'
        local fmtv7 : display %4.2f `v7'
        local fmtv8 : display %4.2f `v8'
            
        file open table6 using "${table_dir}/Table6.tex", write append
        file write table6 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'&`fmtv8'\\ "_n 
        file close table6
    }    
}

* Add median values
file open table6 using "${table_dir}/Table6.tex", write append
sum out_ope_mean2006 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v1 = r(p50)
sum out_ope_mean2010 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v2 = r(p50)
sum out_ope_mean2014 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v3 = r(p50)
sum out_ope_mean2018 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v4 = r(p50)
sum out_ope_mean_rw2006 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v5 = r(p50)
sum out_ope_mean_rw2010 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v6 = r(p50)
sum out_ope_mean_rw2014 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v7 = r(p50)
sum out_ope_mean_rw2018 if inlist(product_module_code, 1042, 1303, 1309, 1498, 7460) == 0, d
local v8 = r(p50)

local fmtv1 : display %4.2f `v1'
local fmtv2 : display %4.2f `v2'
local fmtv3 : display %4.2f `v3'
local fmtv4 : display %4.2f `v4'
local fmtv5 : display %4.2f `v5'
local fmtv6 : display %4.2f `v6'
local fmtv7 : display %4.2f `v7'
local fmtv8 : display %4.2f `v8'
file write table6 "\midrule  "_n 
file write table6 "Median & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'&`fmtv8'\\ "_n 
file write table6 "\bottomrule\end{tabular} "_n     
file close table6

* Appendix Table A16 (Own-Price Elasticity: 70 Product Markets)
di "Generating Table A16"
sort pd 
file open tableA16 using "${table_dir}/TableA16.tex", write replace
file write tableA16 " \\\toprule"_n 
file write tableA16 " \multicolumn{1}{l}{Product Market} & \multicolumn{4}{c}{Unweighted} & \multicolumn{4}{c}{Revenue-Weighted}   \tabularnewline\cmidrule(lr){2-5}\cmidrule(lr){6-9}"_n
file write tableA16 "   & 2006 & 2010 & 2014 & 2018  & 2006 & 2010 & 2014 & 2018  \tabularnewline  \midrule"_n
file write tableA16 "\endfirsthead"_n
file write tableA16 "\toprule"_n
file write tableA16 "  \multicolumn{1}{l}{Product Market} & \multicolumn{4}{c}{Unweighted} & \multicolumn{4}{c}{Revenue-Weighted}   \tabularnewline\cmidrule(lr){2-5}\cmidrule(lr){6-9}"_n
file write tableA16 "  & 2006 & 2010 & 2014 & 2018  & 2006 & 2010 & 2014 & 2018  \tabularnewline  \midrule"_n
file write tableA16 "\endhead "_n
file write tableA16 "\midrule \multicolumn{9}{r}{{Continued on next page}} \\ \bottomrule"_n
file write tableA16 "\endfoot"_n
file write tableA16 "\endlastfoot"_n
file close tableA16

* Write elasticities for all modules (excluding certain modules)
local N = _N
forvalues i = 1(1)`N' {
    if inlist(product_module_code[`i'], 1042, 1303, 1309, 1498, 7460) == 0 {
        local pd = pd[`i']
        local v1 = (round(out_ope_mean2006[`i'], 0.01))
        local v2 = (round(out_ope_mean2010[`i'], 0.01))
        local v3 = (round(out_ope_mean2014[`i'], 0.01))
        local v4 = (round(out_ope_mean2018[`i'], 0.01))
        local v5 = (round(out_ope_mean_rw2006[`i'], 0.01))
        local v6 = (round(out_ope_mean_rw2010[`i'], 0.01))
        local v7 = (round(out_ope_mean_rw2014[`i'], 0.01))
        local v8 = (round(out_ope_mean_rw2018[`i'], 0.01))
        
		local fmtv1 : display %4.2f `v1'
        local fmtv2 : display %4.2f `v2'
        local fmtv3 : display %4.2f `v3'
        local fmtv4 : display %4.2f `v4'
        local fmtv5 : display %4.2f `v5'
        local fmtv6 : display %4.2f `v6'
        local fmtv7 : display %4.2f `v7'
        local fmtv8 : display %4.2f `v8'
        
        file open tableA16 using "${table_dir}/TableA16.tex", write append
        file write tableA16 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'&`fmtv8'\\ "_n 
        file close tableA16
    }
}

file open tableA16 using "${table_dir}/TableA16.tex", write append
file write tableA16 "\bottomrule"_n
file close tableA16

* Prepare UPC-Level Data By Module And Year For Tables on Clustering Results

foreach pm of numlist `module_list'{    
    foreach yr of numlist `year_min'/`year_max'{        
    
        disp(`pm')
        disp(`yr')
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24, clear
                
        gen revenue_pooled = quantity * price
        keep upc upc_ver upc_descr revenue_pooled quantity year cluster_ids n_clusters
        
        collapse (sum) revenue_pooled (sum)  quantity , by(year upc upc_ver upc_d cluster_ids n_clusters)

        save ${data_dir}/Demand_Results/clusterupc_`pm'_`yr'.dta, replace
    }
}


* TABLE 2: CLUSTERING RESULTS FOR CEREAL 
* Note: to make table legible and condense space, output of table 2 code below 
* was manually reformated (including reordering the numbering of the cluster_ids)

clear    
    
local sub_module_list "1344"
file open table2 using "${table_dir}/Table2.tex", write replace

file write table2 "\\\hline "_n
file write table2 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table2 "\endfirsthead"_n
file write table2 "\hline "_n
file write table2 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table2 "\endhead "_n
file write table2 "\hline \multicolumn{6}{r}{{Continued on next page}} \\ \hline"_n
file write table2 "\endfoot"_n
file write table2 "\endlastfoot"_n
file write table2 "\hline"_n

file close table2

foreach pm of numlist `sub_module_list'{    
    clear
    foreach yr of numlist `year_min'/`year_max'{
        append using ${data_dir}/Demand_Results/clusterupc_`pm'_`yr'.dta, generate(newv_`pm'_`yr')
        
    }
    gen product_module_code = `pm'    
    merge m:1 product_module_code upc upc_ver  using  ${data_dir}/Nielsen/movement/products/products_`pm'
    drop if _merge !=3
    drop _merge
    egen tot_quantity = sum(quantity), by(product_module_code upc upc_ver upc_descr)
    gen avg_q = tot_quantity/13
    duplicates drop product_module_code  upc upc_ver, force
    gsort product_module_code cluster_ids -avg_q
    bys  product_module_code  cluster_ids: gen rank = _n
    gen pd = proper(product_module_d)
    replace pd = regexr(pd,"&","\&")
    gen s1u = lower(size1_units)
    gen bd = proper(brand_d)
    replace bd = subinstr(bd,"&","\&",.)
    replace bd = subinstr(bd,"%","\%",.)
    replace bd = subinstr(bd,"'S","'s",.)
    gen upcd = upc_descr
    replace upcd = subinstr(upcd,"&","\&",.)
    replace upcd = subinstr(upcd,"%","\%",.)
    keep if avg_q > 300000
    sort product_module_c cluster_id  brand_d upc_descr size1_a 
    keep pd bd cluster_id 
    duplicates drop
    replace bd = regexr(bd,"Qkr ","")
    replace bd = regexr(bd,"G M ","")
    replace bd = regexr(bd,"Kel ","")
    replace bd = regexr(bd,"Post ","")
    local pd = pd[1]
    file open table2 using "${table_dir}/Table2.tex", write append
    file write table2 "\hline"_n
    file write table2 "\multicolumn{6}{l}{`pd'}\\ "_n 
    file write table2 "\hline"_n
    file close table2
    sort cluster_id bd
    local N = _N
    forvalues i = 1(1)`N'{
        local v1 = cluster_id[`i'] 
        local v2 = bd[`i'] 
        local fmtv4 : display %16.2g `v4'
        file open table2 using "${table_dir}/Table2.tex", write append

        file write table2 "`v1' & `v2' \\ "_n 
        file close table2
    }
    file open table2 using "${table_dir}/Table2.tex", write append
    file write table2 "\hline"_n
    file close table2
}


* TABLE 3: CLUSTERING RESULTS FOR SODA
* Note: to make table legible and condense space, output of table 3 code below 
* was manually reformated (including reordering the numbering of the cluster_ids)

clear    
local sub_module_list "1484"
          
file open table3 using "${table_dir}/Table3.tex", write replace

file write table3 "\\\hline "_n
file write table3 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table3 "\endfirsthead"_n
file write table3 "\hline "_n
file write table3 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table3 "\endhead "_n
file write table3 "\hline \multicolumn{6}{r}{{Continued on next page}} \\ \hline"_n
file write table3 "\endfoot"_n
file write table3 "\endlastfoot"_n
file write table3 "\hline"_n

file close table3

foreach pm of numlist `sub_module_list'{    
    clear
    foreach yr of numlist `year_min'/`year_max'{
        append using ${data_dir}/Demand_Results/clusterupc_`pm'_`yr'.dta, generate(newv_`pm'_`yr')
        
    }
    gen product_module_code = `pm'    
    merge m:1 product_module_code upc upc_ver  using  ${data_dir}/Nielsen/movement/products/products_`pm'
    drop if _merge !=3
    drop _merge
    egen tot_rev = sum(revenue_pooled), by(product_module_code upc upc_ver upc_descr)
    duplicates drop product_module_code  upc upc_ver, force
    gsort product_module_code cluster_ids -tot_rev
    bys  product_module_code  cluster_ids: gen rank = _n
    gen pd = proper(product_module_d)
    replace pd = regexr(pd,"&","\&")
    gen s1u = lower(size1_units)
    gen bd = proper(brand_d)
    replace bd = subinstr(bd,"&","\&",.)
    replace bd = subinstr(bd,"%","\%",.)
    replace bd = subinstr(bd,"'S","'s",.)
    gen upcd = upc_descr
    replace upcd = subinstr(upcd,"&","\&",.)
    replace upcd = subinstr(upcd,"%","\%",.)
    keep if rank <=5
    sort product_module_c cluster_id  brand_d upc_descr size1_a 

    local pd = pd[1]
    file open table3 using "${table_dir}/Table3.tex", write append
    file write table3 "\hline"_n
    file write table3 "\multicolumn{6}{l}{`pd'}\\ "_n 
    file write table3 "\hline"_n
    file close table3

    local N = _N
    forvalues i = 1(1)`N'{
        local v1 = cluster_id[`i'] 
        local v2 = bd[`i'] 
        local v3 = upcd[`i'] 
        local v4 = (round(size1_amount[`i'] ,0.01))
        local v5 = s1u[`i'] 
        local v6 = multi[`i']
        
        local fmtv4 : display %16.2g `v4'
        file open table3 using "${table_dir}/Table3.tex", write append

        file write table3 "`v1' & `v2' & `v3' & `fmtv4'& `v5' & `v6' \\ "_n 
        file close table3
    }
    file open table3 using "${table_dir}/Table3.tex", write append
    file write table3 "\hline"_n
    file close table3
}


* TABLE 4: CLUSTERING RESULTS FOR BEER
* Note: to make table legible and condense space, output of table 4 code below 
* was manually reformated (including reordering the numbering of the cluster_ids)

clear    
    
local sub_module_list "5000"                

file open table4 using "${table_dir}/Table4.tex", write replace

file write table4 "\\\hline "_n
file write table4 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table4 "\endfirsthead"_n
file write table4 "\hline "_n
file write table4 "Nest & Brand &  UPC Descr. & Size & Units  &  Multi  \tabularnewline  \hline"_n
file write table4 "\endhead "_n
file write table4 "\hline \multicolumn{6}{r}{{Continued on next page}} \\ \hline"_n
file write table4 "\endfoot"_n
file write table4 "\endlastfoot"_n
file write table4 "\hline"_n

file close table4

foreach pm of numlist `sub_module_list'{    
    clear
    foreach yr of numlist `year_min'/`year_max'{
        append using ${data_dir}/Demand_Results/clusterupc_`pm'_`yr'.dta, generate(newv_`pm'_`yr')
        
    }
    gen product_module_code = `pm'    
    merge m:1 product_module_code upc upc_ver  using  ${data_dir}/Nielsen/movement/products/products_`pm'
    drop if _merge !=3
    drop _merge
    egen tot_quantity = sum(quantity), by(product_module_code upc upc_ver upc_descr)
    gen avg_q = tot_quantity/13
    duplicates drop product_module_code  upc upc_ver, force
    gsort product_module_code cluster_ids -avg_q
    bys  product_module_code  cluster_ids: gen rank = _n
    gen pd = proper(product_module_d)
    replace pd = regexr(pd,"&","\&")
    gen s1u = lower(size1_units)
    gen bd = proper(brand_d)
    replace bd = subinstr(bd,"&","\&",.)
    replace bd = subinstr(bd,"%","\%",.)
    replace bd = subinstr(bd,"'S","'s",.)
    gen upcd = upc_descr
    replace upcd = subinstr(upcd,"&","\&",.)
    replace upcd = subinstr(upcd,"%","\%",.)
    keep if avg_q > 100000
    sort product_module_c cluster_id  brand_d upc_descr size1_a 
    keep pd bd cluster_id 
    duplicates drop
    local pd = pd[1]
    file open table4 using "${table_dir}/Table4.tex", write append
    file write table4 "\hline"_n
    file write table4 "\multicolumn{6}{l}{`pd'}\\ "_n 
    file write table4 "\hline"_n
    file close table4
    sort cluster_id bd
    local N = _N
    forvalues i = 1(1)`N'{
        local v1 = cluster_id[`i'] 
        local v2 = bd[`i']     
        local fmtv4 : display %16.2g `v4'
        file open table4 using "${table_dir}/Table4.tex", write append

        file write table4 "`v1' & `v2' \\ "_n 
        file close table4
    }
    file open table4 using "${table_dir}/Table4.tex", write append
    file write table4 "\hline"_n
    file close table4
}



* APPENDIX TABLE A22 (PRODUCT NESTS FOR ALL MODULES)

foreach pm of numlist `module_list'{    
    clear
    foreach yr of numlist `year_min'/`year_max'{
        append using ${data_dir}/Demand_Results/clusterupc_`pm'_`yr'.dta, generate(newv_`pm'_`yr')
        
    }
    gen product_module_code = `pm'    
    merge m:1 product_module_code upc upc_ver  using ${data_dir}/Nielsen/movement/products/products_`pm'
    drop if _merge !=3
    drop _merge
    egen tot_rev = sum(revenue_pooled), by(product_module_code upc upc_ver upc_descr)
    duplicates drop product_module_code  upc upc_ver, force
    gsort product_module_code cluster_ids -tot_rev
    bys  product_module_code  cluster_ids: gen rank = _n
    gen pd = proper(product_module_d)
    replace pd = regexr(pd,"&","\&")
    gen s1u = lower(size1_units)
    gen bd = proper(brand_d)
    replace bd = subinstr(bd,"&","\&",.)
    replace bd = subinstr(bd,"%","\%",.)
    replace bd = subinstr(bd,"'S","'s",.)
    gen upcd = upc_descr
    replace upcd = subinstr(upcd,"&","\&",.)
    replace upcd = subinstr(upcd,"%","\%",.)
    keep if rank <=5
    sort product_module_d cluster_id rank
    save ${data_dir}/`pm'_clus_topprod
}
    

* Combine top products across all modules
clear
foreach pm of numlist `module_list'{
    append using ${data_dir}/`pm'_clus_topprod
}

* Create Appendix Table A22
file open tableA22 using "${table_dir}/TableA22.tex", write replace
file write tableA22 "\\\toprule"_n
file write tableA22 "Nest & Brand &  UPC Description & Size & Units  &  Multi  \tabularnewline  \midrule"_n
file write tableA22 "\endfirsthead"_n
file write tableA22 "\toprule"_n
file write tableA22 "Nest & Brand &  UPC Description & Size & Units  &  Multi  \tabularnewline  \midrule"_n
file write tableA22 "\endhead "_n
file write tableA22 "\midrule \multicolumn{6}{r}{{Continued on next page}} \\ \bottomrule"_n
file write tableA22 "\endfoot"_n
file write tableA22 "\endlastfoot"_n
file close tableA22

local N = _N
sort product_module_d cluster_id rank
forvalues i = 1(1)`N'{
    if     pd[`i'] != pd[`i'-1]{
        local pd = pd[`i']
        file open tableA22 using "${table_dir}/TableA22.tex", write append
        file write tableA22 "\hline"_n
        file write tableA22 "\multicolumn{6}{l}{`pd'}\\ "_n 
        file write tableA22 "\hline"_n
        file close tableA22
    }
    local v1 = cluster_id[`i'] 
    local v2 = bd[`i'] 
    local v3 = upcd[`i'] 
    local v4 = (round(size1_amount[`i'] ,0.01))
    local v5 = s1u[`i'] 
    local v6 = multi[`i']
    
    
    local fmtv4 : display %16.2g `v4'
    file open tableA22 using "${table_dir}/TableA22.tex", write append

    file write tableA22 "`v1' & `v2' & `v3' & `fmtv4'& `v5' & `v6' \\ "_n 
    file close tableA22
    if     pd[`i'] != pd[`i'+1]{
        file open tableA22 using "${table_dir}/TableA22.tex", write append
        file write tableA22 "\bottomrule"_n
        file close tableA22
    }    
}


/***************************************************************************
* 5. ESTIMATE MARGINAL COSTS, COMPUTE MARKUPS, AND GENERATE APPENDIX TABLE 17, FIGURES 1, 2, 4, AND APPENDIX FIGURE 9
* - Calculates markups using estimated demand parameters
* - Generates tables and figures summarizing results
***************************************************************************/

* Reset the module list to only include 70 modules with valid demand parameters
local module_list "1030 1044 1050 1105 1114 1120 1177 1257 1290 1293 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7734"


* Compute Marginal Costs with PyBLP
foreach pm of numlist `module_list'{    
    foreach yr of numlist `year_min'/`year_max'{                    
        disp(`pm')
        disp(`yr')
        capture confirm  file ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta
        if _rc == 0{
            use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24, clear
            capture confirm new variable costs_nl
            if _rc==0 {
                duplicates tag upc upc_ver week store_code_uc, gen(dup)
                egen sum_dup = sum(dup)
                if sum_dup[1] != 0{
                    egen t_p_n = wtmean(price_normalized), by(upc upc_ver week store_code_uc) weight(quantity_normalized)
                    egen t_s_n = sum(shares), by(upc upc_ver week store_code_uc) 
                    egen t_q_n = sum(quantity_normalized), by(upc upc_ver week store_code_uc)
                    replace price_normalized = t_p_n
                    replace shares = t_s_n
                    replace quantity_normalized = t_q_n 
                    duplicates drop upc upc_ver week store_code_uc, force
                }
                egen market_ids = group(store_code week_end)    
                merge m:1 product_module_code brand_descr using "${data_dir}/supplemental/brand_lookup.dta"
                drop if _merge == 2
                replace parent_`yr' = -brand_code if _merge ==1
                egen firm_ids = group(parent_`yr')
                keep store_code week_end upc upc_ver  market_ids firm_ids     cluster_ids  price_normalized shares    sigma_nl   alpha_nl delta_nl

                rename price_normalized prices
                rename shares shares
                rename alpha_nl alpha
                rename sigma_nl sigma
                rename delta_nl delta
                rename cluster_ids nesting_ids
                format upc %16.0g
                outsheet using ${data_dir}/Demand_Results/costs_`pm'_`yr'.csv, comma replace
                !python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_"
                clear 
                insheet using  ${data_dir}/Demand_Results/costs_`pm'_`yr'.csv
                keep store_code upc upc_ver week_end costs
                rename costs costs_nl
                merge 1:m  store_code upc upc_ver week_end using ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24
                save ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24, replace
                !rm ${data_dir}/Demand_Results/costs_`pm'_`yr'.csv
                }
            }
        }
}


* APPENDIX FIGURE 6: MARKUP CHANGES VS PARAMETER CHANGES
clear

local sub_year_list "2006 2018"
foreach pm of numlist `module_list'{    
    foreach yr of numlist `sub_year_list'{    
    
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta, clear    
        gen revenue_pooled = quantity * price
		gen ope = alpha_nl*price_normalized*(1/(1-sigma_nl)-sigma_nl/(1-sigma_nl)*exp(ln_sjg)-shares)
		egen out_ope_mean = mean(ope)
		egen out_ope_mean_rw = wtmean(ope), weight(revenue_pooled)
        gen markups = (price_normalized - costs_nl)/(price_normalized)
        gen out_alpha = alpha_nl
        gen out_sigma = sigma_nl
        summarize costs_nl [weight = revenue_pooled], d
        gen out_costs_rw_mean = `r(mean)'
        summarize markups [weight = revenue_pooled], d
        gen out_markups_rw_mean = `r(mean)'
		summarize markups, d
		gen out_markups_mean = `r(mean)'
        keep product_module_code product_module_descr year out_*
        duplicates drop
        save ${data_dir}/Demand_Results/stats_`pm'_`yr'_costs, replace
    }
}
clear 
* Combine statistics across modules and years
local sub_year_list "2006 2018"

foreach pm of numlist `module_list'{    
    foreach yr of numlist `sub_year_list'{    
        append using  ${data_dir}/Demand_Results/stats_`pm'_`yr'_costs
    }
}

* Reshape to wide format for analysis
keep if year == 2006 | year == 2018
keep year product_module_code product_module_descr out_markups_rw_mean  out_alpha out_sigma out_costs_rw_mean
rename out_markups_rw_mean  avg_markups
rename out_alpha alpha 
rename out_sigma sigma
rename out_costs_rw_mean  avg_costs
reshape wide  avg_markups avg_costs alpha sigma   , i(product_module_code product_module_d ) j(year)

* Calculate changes from 2006 to 2018
gen markup_change=avg_markups2018-avg_markups2006
gen alpha_change=alpha2018-alpha2006
gen sigma_change=sigma2018-sigma2006
gen mc_change=log(avg_costs2018/avg_costs2006)
replace product_module_descr=proper(product_module_descr)
spearman markup_change alpha_change sigma_change mc_change

* Create scatterplots for three relationships
graph twoway (scatter markup_change alpha_change if abs(alpha_change)<5 & abs(markup_change)<2 , msym("x") msize(vsmall) mcolor(black) xline(0, lpattern(dash)) yline(0, lpattern(dash))   ), ytitle("{stSerif:Change in Markups}")  xtitle("{stSerif:Change in } {&alpha}") legend(off) xlab(-4(2)4) ylab(-1(.5)1) aspectratio(.8)  name(graphalpha, replace)
    
graph twoway (scatter markup_change sigma_change if abs(sigma_change)<5 & abs(markup_change)<2 , msym("x") msize(vsmall) mcolor(black) xline(0, lpattern(dash)) yline(0, lpattern(dash))   ), ytitle("{stSerif:Change in Markups}")  xtitle("{stSerif:Change in } {&sigma}") legend(off) xlab(-.2(.2).4) ylab(-1(.5)1) aspectratio(.8) name(graphsigma, replace)
    
graph twoway (scatter markup_change mc_change if abs(mc_change)<10 & abs(markup_change)<4, msym("x") msize(vsmall) mcolor(black) xline(0, lpattern(dash)) yline(0, lpattern(dash))), ytitle("{stSerif:Change in Markups}")  xtitle("{stSerif:Log Change in Marginal Costs}") legend(off) xlab(-2(.5).5) ylab(-1(.5)1) aspectratio(.8) name(graphmc, replace)

* Combine into a single figure
graph combine graphalpha graphsigma graphmc, rows(2) cols(2) graphregion(color(white)) note("") b2title("") l2title("") ycommon imargin(0 0 0 0) graphregion(margin(l=20 r=20))
graph export ${table_dir}/AppendixFigure6.eps, replace


* APPENDIX FIGURE 7: ELASTICITY AND MARKUP COMPARISONS
clear
local sub_year_list "2006 2018"

foreach pm of numlist `module_list'{    
    foreach yr of numlist `sub_year_list'{    
        append using  ${data_dir}/Demand_Results/stats_`pm'_`yr'_costs
    }
}

reshape wide out_*, i(product_module_*) j(year)

* Elasticity comparison
graph twoway (scatter out_ope_mean2018 out_ope_mean2006 if out_ope_mean2018>-10 & out_markups_mean2018<1 & out_markups_mean2006<1  , msym("x") msize(vsmall) mcolor(black) ) (function y = x, range(-8 0) lpattern(dash) lcolor(black)) , ytitle("{stSerif:Mean OPE 2018}")  xtitle("{stSerif:Mean OPE 2006}") legend(off) xlab(-10(2)0) ylab(-8(2)0) aspectratio(.8)  name(graphalpha, replace)
graph export ${table_dir}/ope_scatter.eps, replace

* Markup comparison
graph twoway (scatter out_markups_mean2018 out_markups_mean2006 if out_ope_mean2018>-10 & out_markups_mean2018<1 & out_markups_mean2006<1 , msym("x") msize(vsmall) mcolor(black) )  (function y = x, range(.2 0.8) lpattern(dash) lcolor(black)) , ytitle("{stSerif:Mean Markups 2018}")  xtitle("{stSerif:Mean Markups 2006}") legend(off) xlab(.2(.2).8) ylab(.2(.2)1)  aspectratio(.8)  
graph export ${table_dir}/mkp_scatter.eps, replace

* Compute markups and process by module/year
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Computing markups for module `pm', year `yr'"
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta, clear    
        
        * Calculate own-price elasticity and markups
        gen ope = alpha_nl*price_normalized*(1/(1-sigma_nl)-sigma_nl/(1-sigma_nl)*exp(ln_sjg)-shares)
        gen revenue_pooled = quantity * price
        gen markups = (price_normalized - costs_nl)/(price_normalized)
        
        * Keep necessary variables
        keep product_module_code product_module_descr year revenue_pooled markups ope price_normalized costs_nl
        save ${data_dir}/Demand_Results/tmp_stats_`pm'_`yr'_05_24, replace
    }
}

* Calculate markup statistics by module/year
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Calculating markup stats for module `pm', year `yr'"
        use ${data_dir}/Demand_Results/tmp_stats_`pm'_`yr'_05_24.dta, clear    
        
        * Calculate total revenue
        egen out_revenue = sum(revenue_pooled)
        
        * Calculate markup distribution (unweighted)
        summarize markups, d
        gen out_markups_25 = `r(p25)'
        gen out_markups_50 = `r(p50)'
        gen out_markups_75 = `r(p75)'
        gen out_markups_mean = `r(mean)'
        
        * Calculate markup distribution (revenue-weighted)
        summarize markups [weight = revenue_pooled], d
        gen out_markups_rw_25 = `r(p25)'
        gen out_markups_rw_50 = `r(p50)'
        gen out_markups_rw_75 = `r(p75)'
        gen out_markups_rw_mean = `r(mean)'    
        
        * Keep module-level statistics
        keep product_module_code product_module_descr year out_*
        duplicates drop
        save ${data_dir}/Demand_Results/mkp_stats_`pm'_`yr', replace
    }
}

* Combine markup statistics across all modules and years
clear
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Appending markup stats for module `pm', year `yr'"
        append using ${data_dir}/Demand_Results/mkp_stats_`pm'_`yr'        
    }
}
save ${data_dir}/Demand_Results/mkp_stats_allpmyr_weekly_05_24, replace            

* Appendix Table 17 (Markups: 70 Product Markets)
di "Generating Table A17"
use ${data_dir}/Demand_Results/mkp_stats_allpmyr_weekly_05_24, clear        
reshape wide out*, i(product_module_*) j(year)
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")

sort pd
file open tableA17 using "${table_dir}/TableA17.tex", write replace
file write tableA17 "\\\toprule"_n 
file write tableA17 " \multicolumn{1}{l}{Product Market} & \multicolumn{4}{c}{2006} & \multicolumn{4}{c}{2018}   \tabularnewline\cmidrule(lr){2-5}\cmidrule(lr){6-9} & \multicolumn{2}{c}{Median} & \multicolumn{2}{c}{Mean}    & \multicolumn{2}{c}{Median} & \multicolumn{2}{c}{Mean} \tabularnewline\cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7}\cmidrule(lr){8-9}"_n 
file write tableA17 " & UW & W & UW & W & UW & W & UW & W  \tabularnewline \midrule"_n 
file write tableA17 "\endfirsthead"_n 
file write tableA17 "\toprule "_n 
file write tableA17 "  \multicolumn{1}{l}{Product Market} & \multicolumn{4}{c}{2006} & \multicolumn{4}{c}{2018}   \tabularnewline\cmidrule(lr){2-5}\cmidrule(lr){6-9} & \multicolumn{2}{c}{Median} & \multicolumn{2}{c}{Mean}      & \multicolumn{2}{c}{Median} & \multicolumn{2}{c}{Mean}  \tabularnewline \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7}\cmidrule(lr){8-9}"_n 
file write tableA17 " & UW & W & UW & W & UW & W & UW &   W \tabularnewline  \midrule"_n 
file write tableA17 "\endhead "_n 
file write tableA17 "\midrule \multicolumn{9}{r}{{Continued on next page}} \\ \bottomrule"_n 
file write tableA17 "\endfoot"_n 
file write tableA17 "\endlastfoot"_n 
file close tableA17

* Write markups for all modules
local N = _N
forvalues i = 1(1)`N' {
    local pd = pd[`i']
    local v1 = (round(out_markups_502006[`i'], 0.01))
    local v2 = (round(out_markups_rw_502006[`i'], 0.01))
    local v3 = (round(out_markups_mean2006[`i'], 0.01))
    local v4 = (round(out_markups_rw_mean2006[`i'], 0.01))
    local v5 = (round(out_markups_502018[`i'], 0.01))
    local v6 = (round(out_markups_rw_502018[`i'], 0.01))
    local v7 = (round(out_markups_mean2018[`i'], 0.01))
    local v8 = (round(out_markups_rw_mean2018[`i'], 0.01))    
    
    local fmtv1 : display %4.2f `v1'
    local fmtv2 : display %4.2f `v2'
    local fmtv3 : display %4.2f `v3'
    local fmtv4 : display %4.2f `v4'
    local fmtv5 : display %4.2f `v5'
    local fmtv6 : display %4.2f `v6'
    local fmtv7 : display %4.2f `v7'
    local fmtv8 : display %4.2f `v8'
    
    file open tableA17 using "${table_dir}/TableA17.tex", write append
    file write tableA17 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' &`fmtv7'&`fmtv8'\\ "_n 
    file close tableA17
}

file open tableA17 using "${table_dir}/TableA17.tex", write append
file write tableA17 "\bottomrule"_n 
file close tableA17

* Aggregate markups across all modules by year
foreach yr of numlist `year_min'/`year_max' {    
    di "Aggregating markups for year `yr'"
    clear    
    foreach pm of numlist `module_list' {    
        di "  Module `pm'"
        append using ${data_dir}/Demand_Results/tmp_stats_`pm'_`yr'_05_24
    }
    
    * Remove year if already present
    capture confirm variable year
    if _rc==0 {
        drop year    
    }
    
    * Calculate markup distributions (unweighted)
    summarize markups, d
    gen out_markups_all_25 = `r(p25)'
    gen out_markups_all_50 = `r(p50)'
    gen out_markups_all_75 = `r(p75)'
    gen out_markups_all_mean = `r(mean)'
    
    * Calculate markup distributions (revenue-weighted)
    summarize markups [weight = revenue_pooled], d
    gen out_markups_all_rw_25 = `r(p25)'
    gen out_markups_all_rw_50 = `r(p50)'
    gen out_markups_all_rw_75 = `r(p75)'
    gen out_markups_all_rw_mean = `r(mean)'
    
    * Calculate price distributions (unweighted)
    summarize price_normalized, d
    gen out_price_all_25 = `r(p25)'
    gen out_price_all_50 = `r(p50)'
    gen out_price_all_75 = `r(p75)'
    gen out_price_all_mean = `r(mean)'
    
    * Calculate price distributions (revenue-weighted)
    summarize price_normalized [weight = revenue_pooled], d
    gen out_price_all_rw_25 = `r(p25)'
    gen out_price_all_rw_50 = `r(p50)'
    gen out_price_all_rw_75 = `r(p75)'
    gen out_price_all_rw_mean = `r(mean)'
    
    * Calculate cost distributions (unweighted)
    summarize costs, d
    gen out_costs_all_25 = `r(p25)'
    gen out_costs_all_50 = `r(p50)'
    gen out_costs_all_75 = `r(p75)'
    gen out_costs_all_mean = `r(mean)'
    
    * Calculate cost distributions (revenue-weighted)
    summarize costs [weight = revenue_pooled], d
    gen out_costs_all_rw_25 = `r(p25)'
    gen out_costs_all_rw_50 = `r(p50)'
    gen out_costs_all_rw_75 = `r(p75)'
    gen out_costs_all_rw_mean = `r(mean)'
    
    * Keep only aggregated statistics
    keep out*
    keep if _n == 1
    gen year = `yr'
    save ${data_dir}/Demand_Results/stats_`yr'_allpm_05_24_1, replace
}

* Set graph settings
graph set window fontface "stSerif"
graph set window fontfacesymbol "stSerif"

* Combine statistics across all years
clear
foreach yr of numlist `year_min'/`year_max' {
    append using ${data_dir}/Demand_Results/stats_`yr'_allpm_05_24_1
}

* Prepare data for graphs
expand 2, gen(dup)
gen weighting = "Unweighted" if dup == 0
replace weighting = "Revenue Weighted" if dup == 1

* Create variables for different weighting schemes
gen out_markups_25     =    out_markups_all_25 if weighting == "Unweighted"
replace out_markups_25 = out_markups_all_rw_25 if weighting == "Revenue Weighted"

gen out_markups_50     =    out_markups_all_50 if weighting == "Unweighted"
replace out_markups_50 = out_markups_all_rw_50 if weighting == "Revenue Weighted"

gen out_markups_75     =    out_markups_all_75 if weighting == "Unweighted"
replace out_markups_75 = out_markups_all_rw_75 if weighting == "Revenue Weighted"

gen out_markups_mean     =    out_markups_all_mean if weighting == "Unweighted"
replace out_markups_mean = out_markups_all_rw_mean if weighting == "Revenue Weighted"

* Create price and cost variables for different weighting schemes
gen out_costs_25     =    out_costs_all_25 if weighting == "Unweighted"
replace out_costs_25 = out_costs_all_rw_25 if weighting == "Revenue Weighted"

gen out_costs_50     =    out_costs_all_50 if weighting == "Unweighted"
replace out_costs_50 = out_costs_all_rw_50 if weighting == "Revenue Weighted"

gen out_costs_75     =    out_costs_all_75 if weighting == "Unweighted"
replace out_costs_75 = out_costs_all_rw_75 if weighting == "Revenue Weighted"

gen out_costs_mean     =    out_costs_all_mean if weighting == "Unweighted"
replace out_costs_mean = out_costs_all_rw_mean if weighting == "Revenue Weighted"

gen out_price_25     =    out_price_all_25 if weighting == "Unweighted"
replace out_price_25 = out_price_all_rw_25 if weighting == "Revenue Weighted"

gen out_price_50     =    out_price_all_50 if weighting == "Unweighted"
replace out_price_50 = out_price_all_rw_50 if weighting == "Revenue Weighted"

gen out_price_75     =    out_price_all_75 if weighting == "Unweighted"
replace out_price_75 = out_price_all_rw_75 if weighting == "Revenue Weighted"

gen out_price_mean     =    out_price_all_mean if weighting == "Unweighted"
replace out_price_mean = out_price_all_rw_mean if weighting == "Revenue Weighted"

set scheme stcolor_alt
sort year

* Figure 1 (Trends in Markups: Pooled)
di "Generating Figure 1"
forvalues i = 1/2 {
    if `i' == 1 local weight_condition "Unweighted"
    if `i' == 2 local weight_condition "Revenue Weighted"   
    graph twoway (line out_markups_25 year if weighting == "`weight_condition'", lcolor("forest_green")) (line out_markups_50 year if weighting == 	"`weight_condition'", lpattern("_") lcolor("dkorange")) (line out_markups_75 year if weighting == "`weight_condition'", lpattern("_.") lcolor("navy"))  (line out_markups_mean year if weighting == "`weight_condition'", lpattern("-") lcolor("maroon") xlabel(2006 "{stSerif:2006}" 2009 "{stSerif:2009}" 2012 "{stSerif:2012}" 2015 "{stSerif:2015}" 2018 "{stSerif:2018}", nogrid) ylabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid) title("{stSerif:`weight_condition'}", size(*0.90)) legend(region(lstyle(none)) size(*0.90) position(6) ring(3) rows(2) cols(2)  lab(1 "{stSerif:25th Percentile}") lab(2 "{stSerif:50th Percentile}")    lab(3 "{stSerif:75th Percentile}") lab(4 "{stSerif:Mean}"))   xtitle("") subtitle(, bcolor(white)) graphregion(color(white) fcolor(white) icolor(white) ifcolor(white)) bgcolor(white)), name(graph`i', replace)
}
grc1leg graph1 graph2, rows(1) cols(2) graphregion(color(white)) legendfrom(graph1) position(6) note("") graphregion(margin(l=30 r=30)) ycommon
graph export ${table_dir}/Figure1_new.eps, replace

* Appendix Figure 9     
di "Generating Appendix Figure 9"
forvalues i = 1/2 {
    if `i' == 1 local weight_condition "Unweighted"
    if `i' == 2 local weight_condition "Revenue Weighted"
    graph twoway (line out_price_25 year if weighting == "`weight_condition'", lcolor("0 90 181")) (line out_costs_25 year if weighting == "`weight_condition'", lcolor("220 50 32")) (line out_price_50 year if weighting == "`weight_condition'", lpattern("_") lcolor("0 90 181")) (line out_costs_50 year if weighting == "`weight_condition'", lcolor("220 50 32") lpattern("_"))  (line out_price_75 year if weighting == "`weight_condition'", lpattern("_.") lcolor("0 90 181")) (line out_costs_75 year if weighting == "`weight_condition'", lpattern("_.") lcolor("220 50 32")) (line out_price_mean year if weighting == "`weight_condition'", lpattern("-") lcolor("0 90 181"))  (line out_costs_mean year if weighting == "`weight_condition'", lpattern("-") lcolor("220 50 32")  xlabel(2006 "{stSerif:2006}" 2009 "{stSerif:2009}" 2012 "{stSerif:2012}" 2015 "{stSerif:2015}" 2018 "{stSerif:2018}", nogrid)  ylabel(.2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}" 1.2 "{stSerif:1.2}", nogrid)  legend(region(lstyle(none)) size(*0.90) position(6) ring(3)  lab(1 "{stSerif:Price - 25th Percentile}") lab(2 "{stSerif:Cost - 25th Percentile}")  lab(3 "{stSerif:Price - 50th Percentile}") lab(4 "{stSerif:Cost - 50th Percentile}") lab(5 "{stSerif:Price - 75th Percentile}") lab(6 "{stSerif:Cost - 75th Percentile}") lab(7 "{stSerif:Price - Mean}") lab(8 "{stSerif:Cost - Mean}"))  xtitle("") title("{stSerif:`weight_condition'}", size(*0.90)) subtitle(, bcolor(white)) graphregion(color(white) fcolor(white) icolor(white) ifcolor(white)) bgcolor(white)),  name(graph`i', replace)
}
grc1leg graph1 graph2, rows(1) cols(2) graphregion(color(white)) legendfrom(graph1) position(6) note("") graphregion(margin(l=10 r=10)) ycommon
graph export ${table_dir}/Figure_pricecost.eps, replace

* Figure 2 (Trends in Markups, 4 Product Markets)    
di "Generating Figure 2"
use ${data_dir}/Demand_Results/mkp_stats_allpmyr_weekly_05_24, clear
        
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")
keep if product_module_descr == "CEREAL - READY TO EAT" | product_module_descr == "YOGURT-REFRIGERATED" | product_module_descr == "BEER" | product_module_descr == "SOFT DRINKS - LOW CALORIE"

local pd_values `""Beer" "Cereal - Ready To Eat" "Soft Drinks - Low Calorie" "Yogurt-Refrigerated""'

* Create graphs for each of the four product markets
forvalues i = 1/4 {
    local pd_val : word `i' of `pd_values'
    
    graph set window fontface "stSerif"
    graph set window fontfacesymbol "stSerif"
    graph twoway (line out_markups_25 year if pd == "`pd_val'", lcolor("forest_green")) (line out_markups_50 year if pd == "`pd_val'", lpattern("_") lcolor("dkorange"))     (line out_markups_75 year if pd == "`pd_val'", lpattern("_.") lcolor("navy")) (line out_markups_mean year if pd == "`pd_val'", lpattern("-") lcolor("maroon") xlabel(2006 "{stSerif:2006}" 2009 "{stSerif:2009}" 2012 "{stSerif:2012}" 2015 "{stSerif:2015}" 2018 "{stSerif:2018}", nogrid labsize(small)) ylabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(small)) xtitle("") title("{stSerif:`pd_val'}", size(medsmall)) subtitle(, bcolor(white)) graphregion(color(white) fcolor(white) icolor(white) ifcolor(white)) bgcolor(white) aspectratio(.8) legend(rows(2) cols(2) order(1 "{stSerif:25th Percentile}" 2 "{stSerif:50th Percentile}"  3 "{stSerif:75th Percentile}" 4 "{stSerif:Mean}") size(*0.75) symysize(2))),  name(graph`i', replace)
}

* Combine the four graphs
graph set window fontface "stSerif"
graph set window fontfacesymbol "stSerif"
grc1leg graph1 graph2 graph3 graph4, rows(2) cols(2) graphregion(color(white)) legendfrom(graph1) position(6) note("") b2title("") l2title("") commonscheme imargin(0 0 0 0) graphregion(margin(l=30 r=30)) ycommon
graph export ${table_dir}/Figure2.eps, replace

* Figure 4 - Weighted markup calculations
di "Generating Figure 4"
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {        
        di "Processing module `pm', year `yr' for Figure 4"
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta, clear
        gen revenue = quantity_normalized * price_normalized
        gen markups = (price_normalized - costs_nl)/(price_normalized)
        egen upc_rev_`yr' = sum(revenue), by(upc upc_ver)
        egen upc_markup_`yr' = sum(markups), by(upc upc_ver)
        egen upc_count_`yr' = count(markups), by(upc upc_ver)
        duplicates drop upc upc_ver, force
        keep product_module_code year upc upc_ver upc_rev_`yr' upc_markup_`yr' upc_count_`yr'
        save ${data_dir}/Demand_Results/tmp_stats_`pm'_`yr'_05_24_weightedmkps, replace
    }
}

* Combine weighted markup data across years and modules
clear
foreach yr of numlist `year_min'/`year_max' {    
    foreach pm of numlist `module_list' {    
        di "Appending module `pm', year `yr' for Figure 4"
        append using ${data_dir}/Demand_Results/tmp_stats_`pm'_`yr'_05_24_weightedmkps
    }
}

* Calculate UPC revenue weights from 2006 and 2018
egen m_upc_rev_2006 = mean(upc_rev_2006), by(upc upc_ver)
replace m_upc_rev_2006 = 0 if m_upc_rev_2006 == .
egen m_upc_rev_2018 = mean(upc_rev_2018), by(upc upc_ver)
replace m_upc_rev_2018 = 0 if m_upc_rev_2018 == .

* Calculate weighted markups using different weightings
foreach yr of numlist `year_min'/`year_max' {
    * Calculate contemporary-weighted markups
    egen num_`yr' = sum(upc_markup_`yr'*upc_rev_`yr') 
    egen denom_`yr' = sum(upc_rev_`yr'*upc_count_`yr')
    gen frac_`yr' = num_`yr'/denom_`yr'    
    
    * Calculate 2006 fixed weights
    egen num_`yr'_2006 = sum(upc_markup_`yr'*m_upc_rev_2006) 
    egen denom_`yr'_2006 = sum(m_upc_rev_2006*upc_count_`yr')
    gen wfrac_`yr' = num_`yr'_2006/denom_`yr'_2006
    
    * Calculate 2018 fixed weights
    egen num_`yr'_2018 = sum(upc_markup_`yr'*m_upc_rev_2018) 
    egen denom_`yr'_2018 = sum(m_upc_rev_2018*upc_count_`yr')
    gen xfrac_`yr' = num_`yr'_2018/denom_`yr'_2018        
}

* Prepare data for graphing
keep frac_* wfrac_* xfrac_* 
duplicates drop
gen i = 1
reshape long frac_ wfrac_ xfrac_, i(i) j(year)

* Create Figure 4
graph twoway (line frac year, sort lcolor("forest_green")  xlabel(2006 "{stSerif:2006}" 2009 "{stSerif:2009}" 2012 "{stSerif:2012}" 2015 "{stSerif:2015}" 2018 "{stSerif:2018}", nogrid labsize(medsmall))  ylabel(.3 "{stSerif:.30}" .35 "{stSerif:.35}" .4 "{stSerif:.40}" .45 "{stSerif:.45}", nogrid labsize(medsmall))  legend(region(lstyle(none)) position(6) ring(3) lab(1 "{stSerif:Floating Weight}") lab(2 "{stSerif:Fixed 2006 Weight}") lab(3 "{stSerif:Fixed 2018 Weight}"))  xtitle("{stSerif:Year}", size(medsmall)) ytitle("{stSerif:Markup}", size(medsmall)) subtitle(, bcolor(white)))  (line wfrac year, lpattern("_") lcolor("dkorange"))  (line xfrac year, lpattern("-") lcolor("navy") aspect(.7) legend(rows(2) cols(2))) 
graph export ${table_dir}/Figure4.eps, replace
graph export ${table_dir}/Figure4.png, replace


/***************************************************************************
* 6. COMPUTE COUNTERFACTUALS
* - Performs counterfactual analyses to decompose markup changes
* - Examines effects of demand parameters, costs, and ownership changes
***************************************************************************/

foreach pm of numlist `module_list' {    
    di "Computing counterfactuals for module `pm'"
    use "${data_dir}/Demand_Results/NL_`pm'_2006_05_24", clear
    append using "${data_dir}/Demand_Results/NL_`pm'_2018_05_24"

    * Handle duplicates
    duplicates tag upc upc_ver week store_code_uc, gen(dup)
    egen sum_dup = sum(dup)
    
    if sum_dup[1] != 0 {
        * Average duplicates
        egen t_p_n = wtmean(price_normalized), by(upc upc_ver week store_code_uc) weight(quantity_normalized)
        egen t_s_n = sum(shares), by(upc upc_ver week store_code_uc) 
        egen t_q_n = sum(quantity_normalized), by(upc upc_ver week store_code_uc)
        replace price_normalized = t_p_n
        replace shares = t_s_n
        replace quantity_normalized = t_q_n 
        duplicates drop upc upc_ver week store_code_uc, force
    }
                
    * Extract 2006 demand parameters
    gen ta = alpha_nl if year == 2006
    gen ts = sigma_nl if year == 2006    
    egen alpha_2006 = mean(ta)
    egen sigma_2006 = mean(ts)
    drop ta ts

    * Create panel identifiers
    egen u = group(upc upc_ver store_code)
    egen g = group(week_end)
    replace g = g - 52 if year == 2018
    
    * Create counterfactual 2006 costs for 2018 products using fixed effects
    quietly xtset u    
    xtreg costs_nl i.g if year == 2006, fe
    predict tmp_mc_2006_weekfe if year == 2006, xb
    egen mc_2006_weekfe = mean(tmp_mc_2006_weekfe), by(g)
    xtreg costs_nl i.g if year == 2018, fe
    predict tmp_mc_2018 if year == 2018, ue
    gen costs_2006 = tmp_mc_2018 + mc_2006_weekfe if year == 2018
    
    * Keep only 2018 data for counterfactuals
    keep if year == 2018

    * Create market identifiers
    egen market_ids = group(store_code week_end)
        
    * Add parent company information
    merge m:1 product_module_code brand_descr using "${data_dir}/supplemental/brand_lookup.dta", gen(_mergeparent)
    drop if _mergeparent == 2
    replace parent_2006 = -brand_code if _mergeparent == 1
    replace parent_2018 = -brand_code if _mergeparent == 1
    egen firm_ids_2006 = group(parent_2006)
    egen firm_ids_2018 = group(parent_2018)
    
    * Rename variables for pyBLP input
    rename price_normalized prices
    rename shares shares
    rename alpha_nl alpha_2018
    rename sigma_nl sigma_2018
    rename delta_nl delta
    rename cluster_ids nesting_ids
    rename costs_nl costs_2018
    format upc %16.0g
    
    * Keep necessary variables for counterfactuals
    keep store_code week_end upc upc_ver market_ids firm_ids_2006 firm_ids_2018 costs_2006 costs_2018 nesting_ids prices shares sigma_2006 alpha_2006 sigma_2018 alpha_2018 delta

    * Export data for Python counterfactual analysis
    outsheet using "${data_dir}/Demand_Results/counter_`pm'.csv", comma replace
    
    * Run Python counterfactual script
    !python3 ${data_dir}/supplemental/counter.py `pm' "${data_dir}/Demand_Results/counter_"
    
    clear 
    insheet using "${data_dir}/Demand_Results/counter_`pm'.csv"
    gen product_module_code = `pm'
    save "${data_dir}/Demand_Results/counter_`pm'", replace
    !rm "${data_dir}/Demand_Results/counter_`pm'.csv"
}


/***************************************************************************
* 6.1 COMBINE COUNTERFACTUAL RESULTS WITH ORIGINAL DATA
* - Merges counterfactual prices with observed prices
* - Calculates markups under different scenarios
***************************************************************************/

* Prepare data for markup comparison
foreach pm of numlist `module_list' {    
    di "Preparing markup comparison for module `pm'"
    
    * Get 2006 markups
    use "${data_dir}/Demand_Results/NL_`pm'_2006_05_24", clear
    egen t_p_n = wtmean(price_normalized), by(upc upc_ver week store_code_uc) weight(quantity_normalized)
    replace price_normalized = t_p_n
    gen markups_2006 = (price_normalized-costs_nl)/price_normalized
    keep upc upc_ver week store_code_uc markups_2006
    duplicates drop upc upc_ver week store_code_uc, force
    
    * Merge with counterfactual results
    append using "${data_dir}/Demand_Results/counter_`pm'", generate(newv) 
    
    * Flag products that appear in both years
    egen double bothyrs = mean(newv), by(upc upc_ver)
    gen in_both = bothyrs > 0 & bothyrs < 1
    
    * Calculate markups under different scenarios
    gen markups = (prices - costs_2018)/prices
    gen markups_demand = (prices_demand - costs_2018)/prices_demand
    gen markups_cost = (prices_cost - costs_2006)/prices_cost
    gen markups_ownership = (prices_ownership - costs_2018)/prices_ownership
    
    * Keep relevant variables
    keep upc upc_ver week store_code_uc markups* in_both
    gen product_module_code = `pm'
    save "${data_dir}/Demand_Results/markups_`pm'", replace
}

* Combine markup data across all modules
clear
foreach pm of numlist `module_list' {    
    di "Appending markups for module `pm'"
    append using "${data_dir}/Demand_Results/markups_`pm'"
}

* Create percentiles of the markup distribution for each scenario
* Full sample
di "Creating percentiles for full sample"
pctile ecd = markups, n(1000) genp(ecd1)
pctile ecd_demand = markups_demand, n(1000) genp(ecd1_demand)
pctile ecd_cost = markups_cost, n(1000) genp(ecd1_cost)
pctile ecd_ownership = markups_ownership, n(1000) genp(ecd1_ownership)
replace ecd1 = ecd1/100
replace ecd1_demand = ecd1_demand/100
replace ecd1_cost = ecd1_cost/100
replace ecd1_ownership = ecd1_ownership/100

pctile ecd_2006 = markups_2006, n(1000) genp(ecd1_2006)
replace ecd1_2006 = ecd1_2006/100
    
* Products in both years
di "Creating percentiles for products in both years"
pctile ecdA = markups if in_both == 1, n(1000) genp(ecd1A)
pctile ecd_demandA = markups_demand if in_both == 1, n(1000) genp(ecd1_demandA)
pctile ecd_costA = markups_cost if in_both == 1, n(1000) genp(ecd1_costA)
pctile ecd_ownershipA = markups_ownership if in_both == 1, n(1000) genp(ecd1_ownershipA)
pctile ecd_2006A = markups_2006 if in_both == 1, n(1000) genp(ecd1_2006A)
replace ecd1A = ecd1A/100
replace ecd1_demandA = ecd1_demandA/100
replace ecd1_costA = ecd1_costA/100
replace ecd1_ownershipA = ecd1_ownershipA/100
replace ecd1_2006A = ecd1_2006A/100    
    
* Keep only necessary observations
keep if _n < 1000    
save "${table_dir}/data_fig3", replace

/***************************************************************************
* 6.2 GENERATE FIGURES FROM COUNTERFACTUAL ANALYSIS
* - Creates Figure 3 (counterfactual CDFs - balanced panel)
* - Creates Appendix Figure 8 (counterfactual CDFs - full sample)
***************************************************************************/

use "${table_dir}/data_fig3", clear

* Figure 3: Counterfactual CDFs (products in both years)
di "Generating Figure 3"
graph twoway (line ecd1A ecdA if ecdA >= 0 & ecdA <= 1, sort lcolor("forest_green") xlabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall))  ylabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall))  legend(region(lstyle(none)) position(6) ring(3) lab(1 "{stSerif:2018}") lab(2 "{stSerif:2018, but 2006 Preferences}") lab(4 "{stSerif:2018, but 2006 Cost}") lab(3 "{stSerif:2006}"))  xtitle("{stSerif:Markup}", size(medsmall)) ytitle("{stSerif:CDF}", size(medsmall)) subtitle(, bcolor(white))) (line ecd1_demandA ecd_demandA if ecd_demandA >= 0 & ecd_demandA <= 1, lpattern("_") lcolor("dkorange"))  (line ecd1_2006A ecd_2006A if ecd_2006A >= 0 & ecd_2006A <= 1, lpattern("-") lcolor("maroon") aspect(.8) legend(rows(2) cols(2)))  (line ecd1_costA ecd_costA if ecd_costA >= 0 & ecd_costA <= 1, lpattern("_.") lcolor("navy")) 
graph export "${table_dir}/Figure3_inboth.eps", replace    

* Appendix Figure 8: Counterfactual CDFs (full sample)    
di "Generating Figure 8"
graph twoway (line ecd1 ecd if ecd >= 0 & ecd <= 1, sort lcolor("forest_green")  xlabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall))  ylabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall)) legend(region(lstyle(none)) position(6) ring(3) lab(1 "{stSerif:2018}") lab(2 "{stSerif:2018, but 2006 Preferences}") lab(4 "{stSerif:2018, but 2006 Cost}") lab(3 "{stSerif:2006}")) xtitle("{stSerif:Markup}", size(medsmall)) ytitle("{stSerif:CDF}", size(medsmall)) subtitle(, bcolor(white))) (line ecd1_demand ecd_demand if ecd_demand >= 0 & ecd_demand <= 1, lpattern("_") lcolor("dkorange"))  (line ecd1_2006 ecd_2006 if ecd_2006 >= 0 & ecd_2006 <= 1, lpattern("-") lcolor("maroon") aspect(.8) legend(rows(2) cols(2))) (line ecd1_cost ecd_cost if ecd_cost >= 0 & ecd_cost <= 1, lpattern("_.") lcolor("navy")) 
graph export "${table_dir}/Figure8.eps", replace        


/***************************************************************************
* 7. COMPUTE CONSUMER SURPLUS
* - Calculates consumer surplus under nested logit model
* - Examines changes in consumer surplus over time
***************************************************************************/

foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_min'/`year_max' {    
        di "Computing consumer surplus for module `pm', year `yr'"
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24, clear
        
        * Create indicators for first observation in nest and market
        bys store_code date cluster_ids: gen c_ind = _n == 1
        bys store_code date: gen m_ind = _n == 1

        * Calculate nested logit consumer surplus
        * First calculate nest-level inclusive values
        egen temp_cs_1 = sum(exp(delta_nl/(1-sigma_nl))), by(store_code date cluster_ids)
        
        * Then calculate market-level inclusive values
        egen temp_cs_2 = sum(exp((1-sigma_nl)*log(temp_cs_1))) if c_ind == 1, by(store_code date)
        
        * Calculate consumer surplus
        egen cs = mean(log(1 + temp_cs_2)/(-alpha_nl)*market_size), by(store_code date)
        
        * Keep only market-level observations
        keep if m_ind == 1
        save ${data_dir}/Demand_Results/cs_NL_`pm'_`yr'_05_24_allmkts, replace
    }
}

* Aggregate consumer surplus across modules by year
foreach yr of numlist `year_min'/`year_max' {    
    di "Aggregating consumer surplus for year `yr'"
    clear
    foreach pm of numlist `module_list' {    
        append using ${data_dir}/Demand_Results/cs_NL_`pm'_`yr'_05_24_allmkts
    }

    * Sum consumer surplus by product module
    collapse (sum) sum_t_cs = cs, by(product_module_c product_module_d year)
    save ${data_dir}/Demand_Results/cs_NL_allpm_`yr'_05_24_stats_totalcs, replace
}    
    
* Combine consumer surplus data across years
clear
foreach yr of numlist `year_min'/`year_max' {    
    append using ${data_dir}/Demand_Results/cs_NL_allpm_`yr'_05_24_stats_totalcs
}
drop if year == .

* Calculate ratio of consumer surplus relative to 2006
gen tmp = sum_t_cs if year == 2006
egen sum_t_cs06 = mean(tmp), by(product_module_code)
gen ratio = sum_t_cs/sum_t_cs06
keep product_module_descr year ratio
reshape wide ratio, i(product_module_descr) j(year)
    
* Table 7 (Total Consumer Surplus Annual Distributions)
di "Generating Table 7"
file open table7 using "${table_dir}/Table7_cs.tex", write replace
file write table7 "\\\toprule "_n 
file write table7 " Percentile & 2007 & 2008 & 2009 & 2010 & 2011 & 2012 & 2013 & 2014 & 2015 & 2016 & 2017 & 2018 \tabularnewline  \midrule"_n 
    
* Calculate and format percentiles for each year
foreach yr of numlist 2007/`year_max' {
    local i = `yr' - 2006
    summarize ratio`yr', d
    local v`i' = (round(r(p25), 0.01))
    local w`i' = (round(r(p50), 0.01))
    local x`i' = (round(r(p75), 0.01))
    local y`i' = (round(r(mean), 0.01))
}
    
* Format values for output
forvalues i = 1/12 {
    local fmtv`i' : display %4.2f `v`i''
    local fmtw`i' : display %4.2f `w`i''
    local fmtx`i' : display %4.2f `x`i''
    local fmty`i' : display %4.2f `y`i''
}
    
* Write values to table
file write table7 " 25th & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4' & `fmtv5' & `fmtv6' & `fmtv7' & `fmtv8' & `fmtv9' & `fmtv10' & `fmtv11' & `fmtv12' \\ "_n 
file write table7 " 50th & `fmtw1' & `fmtw2' & `fmtw3' & `fmtw4' & `fmtw5' & `fmtw6' & `fmtw7' & `fmtw8' & `fmtw9' & `fmtw10' & `fmtw11' & `fmtw12' \\ "_n 
file write table7 " 75th & `fmtx1' & `fmtx2' & `fmtx3' & `fmtx4' & `fmtx5' & `fmtx6' & `fmtx7' & `fmtx8' & `fmtx9' & `fmtx10' & `fmtx11' & `fmtx12' \\\midrule "_n 
file write table7 " Mean & `fmty1' & `fmty2' & `fmty3' & `fmty4' & `fmty5' & `fmty6' & `fmty7' & `fmty8' & `fmty9' & `fmty10' & `fmty11' & `fmty12' \\\bottomrule "_n 
file close table7
    
* Table A18 (Total Consumer Surplus: 70 Product Markets)
di "Generating Table A18"
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")
sort pd 
file open tableA18 using "${table_dir}/TableA18_cs.tex", write replace
file write tableA18 "\\\toprule "_n 
file write tableA18 " Product Market & $\,\,\,\,\,\,$ 2009 $\,\,\,\,\,\,  $\,\,\,\,\,\,$ 2012$\,\,\,\,\,\, $\,\,\,\,\,\,$ 2015 $\,\,\,\,\,\,$ & $\,\,\,\,\,\,$ 2018 $\,\,\,\,\,\,$ \tabularnewline  \midrule"_n 
file write tableA18 "\endfirsthead"_n 
file write tableA18 "\\\toprule "_n 
file write tableA18 " Product Market & $\,\,\,\,\,\,$ 2009 $\,\,\,\,\,\,  $\,\,\,\,\,\,$ 2012$\,\,\,\,\,\, $\,\,\,\,\,\,$ 2015 $\,\,\,\,\,\,$ & $\,\,\,\,\,\,$ 2018 $\,\,\,\,\,\,$ \tabularnewline  \midrule"_n 
file write tableA18 "\endhead "_n 
file write tableA18 "\midrule \multicolumn{5}{r}{{Continued on next page}} \\ \bottomrule"_n 
file write tableA18 "\endfoot"_n 
file write tableA18 "\endlastfoot"_n 
file close tableA18

* Write consumer surplus ratios by product market
local N = _N
forvalues i = 1(1)`N' {
    local pd = pd[`i']
    local v1 = (round(ratio2009[`i'], 0.01))
    local v2 = (round(ratio2012[`i'], 0.01))
    local v3 = (round(ratio2015[`i'], 0.01))
    local v4 = (round(ratio2018[`i'], 0.01))
    
    local fmtv1 : display %4.2f `v1'
    local fmtv2 : display %4.2f `v2'
    local fmtv3 : display %4.2f `v3'
    local fmtv4 : display %4.2f `v4'
    
    file open tableA18 using "${table_dir}/TableA18_cs.tex", write append
    file write tableA18 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4' \\ "_n 
    file close tableA18
}
file open tableA18 using "${table_dir}/TableA18_cs.tex", write append
file write tableA18 "\bottomrule"_n 
file close tableA18        


/***************************************************************************
* 8. ADDITIONAL ANALYSES
* - Alternative clustering approaches
* - Shrinkflation analysis
* - Ownership concentration analysis
***************************************************************************/

/***************************************************************************
* 8.1 ALTERNATIVE CLUSTERING APPROACHES
* - Compares different clustering methods
* - Tests robustness of product classification
***************************************************************************/

*Run the clustering analysis on all 75 modules
local module_list "1030 1042 1044 1050 1105 1114 1120 1177 1257 1290 1293 1303 1309 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1498 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7460 7734"
        
foreach pm of numlist `module_list' {
    di "Analyzing clustering for module `pm'"
    
    * Load clustering results
    use ${data_dir}/Nielsen/panel/clustering_results_new_`pm', clear
    
    * Merge with product information
    merge m:1 product_module_code upc upc_ver using ${data_dir}/Nielsen/movement/products/products_`pm'
    keep if _merge == 3            
    drop _merge     
    duplicates drop upc upc_ver, force     
    
    preserve
    * Clean brand name from UPC description
    set more off
    gen brand = brand_descr
    gen prod = strtrim(upc_descr)
    egen upc_group = group(upc upc_ver) 
    duplicates drop brand_descr prod upc_group, force
    sort brand prod upc
    gen counter = 1
    gen matchwhole = regexm(prod, brand)
    replace prod = regexreplaceall(prod, "&", " ")
    
    * Split product description
    split prod, gen(abbrev_)
    reshape long abbrev_, i(brand prod upc_group) 
    drop if abbrev_ == ""

    * Count word occurrences
    egen totalcount = sum(counter), by(abbrev)
    egen totalcountinbrand = sum(counter), by(abbrev brand)
    egen gr = group(abbrev brand)
    egen mingr = min(gr), by(abbrev)
    egen maxgr = max(gr), by(abbrev)
    gen numbrands = maxgr-mingr+1
    gen brandwords = wordcount(brand)

    * Process brand terms
    order brand prod upc_group abbrev_ brandwords matchwhole _j numbrands totalcount totalcountinbrand
    drop if _j <= brandwords & matchwhole == 1  

    * Calculate brand-related statistics
    gen totalcount1 = totalcount if _j == 1
    gen totalcountinbrand1 = totalcountinbrand if _j == 1
    gen numbrands1 = numbrands if _j == 1
    egen mtotalcount1 = mean(totalcount1), by(brand prod upc)
    egen mtotalcountinbrand1 = mean(totalcountinbrand1), by(brand prod upc)
    egen mtnumbrands1 = mean(numbrands1), by(brand prod upc)
    drop if _j == 1 

    * Create regular expression for brand lookup
    gen regexlookup = ""
    forvalues i = 1(1)9 {
        gen char_`i' = substr(abbrev, `i', 1)
        replace regexlookup = regexlookup + char_`i' + "(.+)*" if char_`i' != "-" & char_`i' != ""
    }
    drop char*

    * Calculate brand ratio
    gen ratio = numbrands/mtnumbrands1
    
    * Process multi-word brands
    gen keepon = 2
    levelsof brandwords, local(levels) 
    foreach i of local levels {  
        if `i' > 1 {  
            gen match`i'a = regexm(brand, regexlookup) if matchwhole == 0 & _j == `i' & `i' <= brandwords & keepon == `i' 
            gen match`i'b = 1 if matchwhole == 0 & totalcountinbrand >= mtotalcountinbrand1 & _j == `i' & `i' <= brandwords & keepon == `i' 
            gen match`i'c = 1 if matchwhole == 0 & ratio <= 10 & _j == `i' & `i' <= brandwords & keepon == `i' 
            gen match`i' = match`i'a + match`i'b + match`i'c
            drop if _j == `i' & matchwhole == 0 & match`i' == 3  
            drop keepon
            egen keepon = min(_j) if matchwhole == 0, by(brand prod)  
        }
    }
    
    * Create cleaned product description
    keep brand prod upc upc_v upc_group abbrev _j
    reshape wide abbrev, i(brand prod upc upc_v upc_group) j(_j)
    gen cleanedprod = ""
    foreach var of varlist abbrev_* {
        replace cleanedprod = cleanedprod + " " + `var'
    }
    replace cleanedprod = strtrim(cleanedprod)
    keep upc upc_ver cleanedprod
    duplicates drop 
    save ${data_dir}/Nielsen/panel/cleanedupc_d, replace
    
    restore 
    merge m:1 upc upc_ver using ${data_dir}/Nielsen/panel/cleanedupc_d

    * Calculate cluster-level statistics
    egen cluster_count = count(upc), by(cluster_ids)
    egen total_count = count(upc)

    * Calculate variance ratios for continuous variables
    foreach x of varlist size1_a multi {
        egen var_x = var(`x')
        gen T_`x' = _N^2*var_x
        egen cvar_x = var(`x'), by(cluster_ids)
        gen WC_x = cluster_count^2*cvar_x
        egen W_`x' = sum(WC_x/cluster_count)
        gen ratio_`x' = W_`x'/(T_`x')
        drop WC_x var_x cvar_x
    }

    * Calculate variance ratios for categorical variables
    foreach x of varlist brand_descr {
        egen x_cluster_count = count(upc), by(`x' cluster_ids)
        egen x_total_count = count(upc), by(`x')
        egen T_`x' = sum(0.5*total_count*x_total_count*(1-x_total_count/total_count)/x_total_count)
        egen WC = sum(0.5*cluster_count*x_cluster_count*(1-x_cluster_count/cluster_count)/x_cluster_count), by(cluster_ids)
        egen W_`x' = sum(WC/cluster_count)
        gen ratio_`x' = W_`x'/(T_`x')
        drop WC x_cluster_count x_total_count
    }

    * Process text features
    split cleanedprod, gen(abbrev_)
    gen tmp = 0
    gen tmp_c = 0
    
    forvalues i = 1/`=_N' {
        gen cid = cluster_ids == cluster_ids[`i']
        foreach x of varlist abbrev* {
            if `x'[`i'] != "" {
                gen ttmp = 1-regexm(cleanedprod, " " + `x'[`i'] + " ")
                egen ttmp_all = sum(ttmp)
                egen ttmp_c = sum(ttmp*cid)
                replace tmp = tmp[`i'] + ttmp_all[`i'] in `i'
                replace tmp_c = tmp_c[`i'] + ttmp_c[`i'] in `i'
                drop ttmp ttmp_all ttmp_c
            }
        }
        drop cid
    }

    * Calculate UPC description ratio
    egen T = sum(0.5*tmp)
    egen W = sum(0.5*tmp_c)
    gen ratio_upcdescr = W/T

    * Save clustering statistics
    keep product_module_* ratio*
    duplicates drop
    save ${data_dir}/Nielsen/panel/clustering_`pm'_reason, replace
}

* Combine clustering statistics across modules
clear
foreach pm of numlist `module_list' {
    append using ${data_dir}/Nielsen/panel/clustering_`pm'_reason
}

* Merge with product information
merge 1:m product_module_code using ${data_dir}/Nielsen/movement/products
keep if _merge == 3
duplicates drop product_module_code, force

* Create Appendix Table 19 (Clustering Reasons)
di "Generating Table 19 (Clustering Reasons)"
file open table19 using "${table_dir}/TableClusterReasons.tex", write replace
file write table19 "\toprule"_n
file write table19 "Product Module & Unit Size &  Multi & Brand & UPC Description \tabularnewline  \midrule"_n
file write table19 "\endhead "_n
file write table19 "\midrule \multicolumn{6}{r}{{Continued on next page}} \\ \bottomrule"_n
file write table19 "\endfoot"_n
file write table19 "\endlastfoot"_n
file close table19

file open table19 using "${table_dir}/TableClusterReasons.tex", write append
file write table19 "\hline"_n
file close table19

* Format output and write to table
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")
sort pd
local N = _N
forvalues i = 1(1)`N' {
    local v1 = pd[`i'] 
    local v2 = (round(ratio_size[`i'], 0.01))
    local v3 = (round(ratio_mul[`i'], 0.01))
    local v4 = (round(ratio_brand[`i'], 0.01))
    local v5 = (round(ratio_upc[`i'], 0.01))
    
    local fmtv2 : display %16.2g `v2'
    local fmtv3 : display %16.2g `v3'
    local fmtv4 : display %16.2g `v4'
    local fmtv5 : display %16.2g `v5'
    
    file open table19 using "${table_dir}/TableClusterReasons.tex", write append
    file write table19 "`v1' & `fmtv2' & `fmtv3' & `fmtv4' & `fmtv5' \\ "_n 
    file close table19
}

file open table19 using "${table_dir}/TableClusterReasons.tex", write append
file write table19 "\bottomrule"_n
file close table19

* Prepare data for clustering with single-person households
use ${data_dir}/Nielsen/panel/for_correlation_allyr, clear
merge m:1 household_code using ${data_dir}/Nielsen/panel/single_hh_allyr
keep if _merge == 3 
drop _merge
save ${data_dir}/Nielsen/panel/for_correlation_all_new_singleHH, replace

* Run clustering on single-person household data
clear
foreach pm of numlist `module_list' {
    di "Clustering with single-person households for module `pm'"
    use ${data_dir}/Nielsen/panel/for_correlation_all_new_singleHH, clear
	 * Get household purchase data for current module
    merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/upc_`pm'
    keep if _merge == 3
    drop _merge

    egen product = group(upc upc_ver)
	
	save ${data_dir}/Nielsen/panel/singleHH_prodmapping_`pm', replace
	
    keep product household product_module_code
    gen purchase = 1
    reshape wide purchase, i(household_code product_m) j(product)
    
    * Replace missing values with zeros
    foreach var of varlist purchase* {
        quietly replace `var' = 0 if `var' == .
    }
    
    * Calculate correlation-based dissimilarity matrix
    matrix dissimilarity DD = purchase*, correlation dissim(oneminus) var 
    clustermat wardslinkage DD, shape(full) clear name(cluster_results) label(product)
    sort cluster_results_id
    dudahart, dist(DD) ngroups(30) id(cluster_results_id)
    
    * Find optimal number of clusters
    local i = 1
    local j = `i' + 1
    while `r(dudat2_`i')' > `r(dudat2_`j')' {
        local i = `i' + 1
        local j = `j' + 1
    }
    gen n_clusters = `i'
     
	* Generate cluster assignments
    cluster gen cluster_ids = groups(`i')
    
    * Clean up and prepare for merging
    gen temp = substr(product, 9, .)
    destring temp, replace
    keep temp cluster_ids n_clusters
    rename temp product
    
    * Merge back with original data
    merge 1:m product using  ${data_dir}/Nielsen/panel/singleHH_prodmapping_`pm'
    
    * Keep only necessary variables
    keep product_module_c upc upc_ver cluster_ids n_clusters
    order product_module_c upc upc_ver cluster_ids n_clusters
	rename cluster_ids cluster_ids_singleHH   
    duplicates drop
	
	save ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_singleHH, replace
    di "Module `pm' optimal clusters: `=n_clusters'"
}


* Process first half of household panel data for clustering
clear
foreach yr of numlist `year_min_panel'/2010 {
    append using ${data_dir}/Nielsen/panel/for_correlation_`yr'
}
duplicates drop
save ${data_dir}/Nielsen/panel/for_correlation_all_firsthalf, replace

* Run clustering on first half of household data
foreach pm of numlist `module_list' {
    di "Clustering with first half households for module `pm'"
    use ${data_dir}/Nielsen/panel/for_correlation_all_firsthalf, clear
    
	* Get household purchase data for current module
    merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/upc_`pm'
    keep if _merge == 3
    drop _merge

    egen product = group(upc upc_ver)

	save ${data_dir}/Nielsen/panel/firsthalf_prodmapping_`pm', replace
	
    keep product household product_module_code
    gen purchase = 1
    reshape wide purchase, i(household_code product_m) j(product)
    
	* Replace missing values with zeros
    foreach var of varlist purchase* {
        quietly replace `var' = 0 if `var' == .
    }
    
	* Calculate correlation-based dissimilarity matrix
    matrix dissimilarity DD = purchase*, correlation dissim(oneminus) var 
    clustermat wardslinkage DD, shape(full) clear name(cluster_results) label(product)
    sort cluster_results_id
    dudahart, dist(DD) ngroups(30) id(cluster_results_id)
    
    local i = 1
    local j = `i' + 1
    while `r(dudat2_`i')' > `r(dudat2_`j')' {
        local i = `i' + 1
        local j = `j' + 1
    }
    gen n_clusters = `i'
	
	* Generate cluster assignments
    cluster gen cluster_ids = groups(`i')
    
    * Clean up and prepare for merging
    gen temp = substr(product, 9, .)
    destring temp, replace
    keep temp cluster_ids n_clusters
    rename temp product
    
    * Merge back with original data
    merge 1:m product using  ${data_dir}/Nielsen/panel/firsthalf_prodmapping_`pm'
    
    * Keep only necessary variables
    keep product_module_c upc upc_ver cluster_ids n_clusters
    order product_module_c upc upc_ver cluster_ids n_clusters
	rename cluster_ids cluster_ids_firsthalf   
    duplicates drop
	
    save ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_firsthalf, replace
    di "Module `pm' optimal clusters: `=n_clusters'"
}


* Process second half of household panel data for clustering
clear
foreach yr of numlist 2011/`year_max_panel' {
    append using ${data_dir}/Nielsen/panel/for_correlation_`yr'
}
duplicates drop
save ${data_dir}/Nielsen/panel/for_correlation_all_secondhalf, replace

* Run clustering on second half of household data
clear
foreach pm of numlist `module_list' {
    di "Clustering with second half households for module `pm'"
    use ${data_dir}/Nielsen/panel/for_correlation_all_secondhalf, clear
    * Get household purchase data for current module
    merge m:1 upc upc_ver using ${data_dir}/Nielsen/movement/upc_`pm'
    keep if _merge == 3
    drop _merge

    egen product = group(upc upc_ver)

	save ${data_dir}/Nielsen/panel/secondhalf_prodmapping_`pm', replace
	
    keep product household product_module_code
    gen purchase = 1
    reshape wide purchase, i(household_code product_m) j(product)
    
	* Replace missing values with zeros
    foreach var of varlist purchase* {
        quietly replace `var' = 0 if `var' == .
    }
    
	* Calculate correlation-based dissimilarity matrix
    matrix dissimilarity DD = purchase*, correlation dissim(oneminus) var 
    clustermat wardslinkage DD, shape(full) clear name(cluster_results) label(product)
    sort cluster_results_id
    dudahart, dist(DD) ngroups(30) id(cluster_results_id)
    
    local i = 2
    local j = `i' + 1
    while `r(dudat2_`i')' > `r(dudat2_`j')' {
        local i = `i' + 1
        local j = `j' + 1
    }
    gen n_clusters = `i'
    
	* Generate cluster assignments
    cluster gen cluster_ids = groups(`i')
    
    * Clean up and prepare for merging
    gen temp = substr(product, 9, .)
    destring temp, replace
    keep temp cluster_ids n_clusters
    rename temp product
    
    * Merge back with original data
    merge 1:m product using  ${data_dir}/Nielsen/panel/secondhalf_prodmapping_`pm'
    
    * Keep only necessary variables
    keep product_module_c upc upc_ver cluster_ids n_clusters
    order product_module_c upc upc_ver cluster_ids n_clusters
	rename cluster_ids cluster_ids_secondhalf  
    duplicates drop
	
    save ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_secondhalf, replace
    di "Module `pm' optimal clusters: `=n_clusters'"
}


* Compare clustering results across approaches
* Compare with single household clustering
foreach pm of numlist `module_list' {
    di "Comparing clustering approaches for module `pm'"
    use ${data_dir}/Nielsen/panel/clustering_results_new_`pm', clear
    rename cluster_ids cluster_ids_main
    rename n_clusters n_clusters_main        
    duplicates drop
    
    * Compare with single-person household clustering
    merge 1:1 upc upc_ver using ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_singleHH   
    drop if _merge != 3
    drop _merge    
    egen product = group(upc upc_ver)        

    gen num = 0
    gen denom = 0

    local num = 0
    local denom = 0
    sum product

    local m = `r(max)'

    * Count agreement between main and single household clustering
    forvalues i = 1(1)`m' {
        di "Processing product `i' of `m'"
        local s = `i' + 1
        forvalues j = `s'(1)`m' {
            local denom = `denom' + 1
            if cluster_ids_main[`i'] == cluster_ids_main[`j'] & cluster_ids_singleHH[`i'] == cluster_ids_singleHH[`j'] {
                local num = `num' + 1
            }
            if cluster_ids_main[`i'] != cluster_ids_main[`j'] & cluster_ids_singleHH[`i'] != cluster_ids_singleHH[`j'] {
                local num = `num' + 1 
            }
            replace num = `num'
            replace denom = `denom'
        }
    }
    
    * Calculate agreement fraction and save
    gen frac = num/denom
    egen n_prod_overlap = max(product)
    keep product_module_code n_clusters_main n_clusters num denom frac n_prod_overlap
    duplicates drop
    save ${data_dir}/Nielsen/panel/cluster_comp_main_singleHH_`pm', replace
}        

* Compare with first half clustering
foreach pm of numlist `module_list' {
    di "Comparing main to first half clustering for module `pm'"
    use ${data_dir}/Nielsen/panel/clustering_results_new_`pm', clear
    rename cluster_ids cluster_ids_main
    rename n_clusters n_clusters_main        
    duplicates drop
    
    * Merge with first half clustering
    merge 1:1 upc upc_ver using ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_firsthalf    
    drop if _merge != 3
    drop _merge    
    egen product = group(upc upc_ver)        

    gen num = 0
    gen denom = 0

    local num = 0
    local denom = 0
    sum product

    local m = `r(max)'

    * Count agreement between main and first half clustering
    forvalues i = 1(1)`m' {
        di "Processing product `i' of `m'"
        local s = `i' + 1
        forvalues j = `s'(1)`m' {
            local denom = `denom' + 1
            if cluster_ids_main[`i'] == cluster_ids_main[`j'] & cluster_ids_firsthalf[`i'] == cluster_ids_firsthalf[`j'] {
                local num = `num' + 1
            }
            if cluster_ids_main[`i'] != cluster_ids_main[`j'] & cluster_ids_firsthalf[`i'] != cluster_ids_firsthalf[`j'] {
                local num = `num' + 1 
            }
            replace num = `num'
            replace denom = `denom'
        }
    }
    
    * Calculate agreement fraction and save
    gen frac = num/denom
    egen n_prod_overlap = max(product)
    keep product_module_code n_clusters_main n_clusters num denom frac n_prod_overlap
    duplicates drop
    save ${data_dir}/Nielsen/panel/cluster_comp_main_firsthalf_`pm', replace
}        

* Compare with second half clustering
foreach pm of numlist `module_list' {
    di "Comparing main to second half clustering for module `pm'"
    use ${data_dir}/Nielsen/panel/clustering_results_new_`pm', clear
    rename cluster_ids cluster_ids_main
    rename n_clusters n_clusters_main        
    duplicates drop
    
    * Merge with second half clustering
    merge 1:1 upc upc_ver using ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_secondhalf   
    drop if _merge != 3
    drop _merge    
    egen product = group(upc upc_ver)        

    gen num = 0
    gen denom = 0

    local num = 0
    local denom = 0
    sum product

    local m = `r(max)'

    * Count agreement between main and second half clustering
    forvalues i = 1(1)`m' {
        di "Processing product `i' of `m'"
        local s = `i' + 1
        forvalues j = `s'(1)`m' {
            local denom = `denom' + 1
            if cluster_ids_main[`i'] == cluster_ids_main[`j'] & cluster_ids_secondhalf[`i'] == cluster_ids_secondhalf[`j'] {
                local num = `num' + 1
            }
            if cluster_ids_main[`i'] != cluster_ids_main[`j'] & cluster_ids_secondhalf[`i'] != cluster_ids_secondhalf[`j'] {
                local num = `num' + 1 
            }
            replace num = `num'
            replace denom = `denom'
        }
    }
    
    * Calculate agreement fraction and save
    gen frac = num/denom
    egen n_prod_overlap = max(product)
    keep product_module_code n_clusters_main n_clusters num denom frac n_prod_overlap
    duplicates drop
    save ${data_dir}/Nielsen/panel/cluster_comp_main_secondhalf_`pm', replace
}                

* Combine comparison results
clear
foreach pm of numlist `module_list' {
    append using ${data_dir}/Nielsen/panel/cluster_comp_main_singleHH_`pm'
}
rename n_clusters n_clusters_singleHH
rename frac frac_singleHH

foreach pm of numlist `module_list' {
    append using ${data_dir}/Nielsen/panel/cluster_comp_main_secondhalf_`pm'
}
rename n_clusters n_clusters_secondhalf
rename frac frac_secondhalf

foreach pm of numlist `module_list' {
    append using ${data_dir}/Nielsen/panel/cluster_comp_main_firsthalf_`pm'
}
rename n_clusters n_clusters_firsthalf
rename frac frac_firsthalf

* Calculate averages by module
collapse frac_singleHH frac_firsthalf frac_secondhalf n_clusters_main n_clusters_firsthalf n_clusters_secondhalf n_clusters_singleHH, by(product_module_code)

* Figure 5: Alternative Clustering Histograms
di "Generating Figure 5"
twoway (kdensity frac_firsthalf, lcolor("dkorange")) (kdensity frac_secondhalf, lpattern("--") lcolor("navy")  legend(region(lstyle(none)) position(6) ring(3) lab(1 "Full Household Purchase Data vs. First Half (2004-2010)")  lab(2 "Full Household Purchase Data vs. Second Half (2011-2018)")  lab(3 "Full Household Purchase Data vs. Single Person Households"))  ylabel(0(1)4, nogrid) xlabel(0(.2)1, nogrid)  xtitle("Fraction of Product-Pairs With Coinciding Nest Assignment in Product Module")  subtitle(, bcolor(white)))  (kdensity frac_singleHH, lpattern("_") lcolor("forest_green")) 
graph export ${table_dir}/Figure5_AltClusteringHists.eps, replace

* Appendix Table 20 (Alternative Clustering Overlap)
di "Generating Table 20"
merge 1:m product_module_code using ${data_dir}/Nielsen/movement/products
drop if _merge == 2
keep product_module_code product_module_descr frac_* n_clus*
duplicates drop

file open table20 using "${table_dir}/Table20_AltClusterOverlap.tex", write replace
file write table20 "\\\hline "_n
file write table20 " & \multicolumn{4}{c}{Number of Clusters} & \multicolumn{3}{c}{Fraction Overlap}\\"_n
file write table20 " \cline{2-5} \cline{6-8}"_n    
file write table20 "Product Module & All & First Half & Second Half & Single HH & First Half & Second Half & Single HH \tabularnewline \hline"_n
file write table20 "\endfirsthead"_n
file write table20 "\hline "_n
file write table20 " & \multicolumn{4}{c}{Number of Clusters} & \multicolumn{3}{c}{Fraction Overlap}\\"_n
file write table20 " \cline{2-5} \cline{6-8}"_n    
file write table20 "Product Module & All & First Half & Second Half & Single HH & First Half & Second Half & Single HH \tabularnewline \hline"_n
file write table20 "\endhead "_n
file write table20 "\hline \multicolumn{6}{r}{{Continued on next page}} \\ \hline"_n
file write table20 "\endfoot"_n
file write table20 "\endlastfoot"_n
file write table20 "\hline"_n
file close table20


gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")

local N = _N
forvalues i = 1(1)`N' {
    local v1 = pd[`i'] 
    local v2 = n_clusters_main[`i']
    local v3 = n_clusters_first[`i']
    local v4 = n_clusters_second[`i']
    local v5 = n_clusters_single[`i']
    local v6 = (round(frac_firsthalf[`i'], 0.01))
    local v7 = (round(frac_secondhalf[`i'], 0.01))
    local v8 = (round(frac_singleHH[`i'], 0.01))
    
    local fmtv6 : display %16.2g `v6'
    local fmtv7 : display %16.2g `v7'
    local fmtv8 : display %16.2g `v8'
    
    file open table20 using "${table_dir}/Table20_AltClusterOverlap.tex", write append
    file write table20 "`v1' & `v2' & `v3' & `v4'& `v5' & `fmtv6' & `fmtv7' & `fmtv8' \\ "_n 
    file close table20
}

file open table20 using "${table_dir}/Table20_AltClusterOverlap.tex", write append
file write table20 "\hline"_n
file close table20


/***************************************************************************
* 8.2 MARKET SIZE ROBUSTNESS ANALYSIS
* - Tests sensitivity of demand estimates to market size definition
***************************************************************************/

local mult_list "1 2"        
        
foreach yr of numlist `year_min'/`year_max' {
    di "Market size robustness for year `yr'"
    use "${data_dir}/Demand_Results/NL_1344_`yr'_05_24", clear
    
    * Calculate baseline diversion ratios
    gen dso_dpj = -alpha_nl * shares * outside_share
    gen e_j0 = -alpha_nl * shares * price_normalized
    
    * Test different market size multipliers
    foreach x of numlist `mult_list' {
        * Create alternative market sizes
        gen market_size_`x' = max_inside_qunat_norm*(`x' + 0.5)
        gen shares_`x' = quantity_norm/market_size_`x'
        egen inside_share_`x' = sum(shares_`x'), by(store_code date)
        gen outside_share_`x' = 1-inside_share_`x'
    
        * Create alternative nested logit variables
        gen ln_sjg_`x' = log(quantity_norm/nest_quantity_norm)
        gen y_var_`x' = log(shares_`x'/outside_share_`x')
    
        * Estimate demand with alternative market size
        ivreghdfe y_var_`x' (ln_sjg_`x' price_normalized = num_prods num_prods_nest z_hausman), absorb(upc_fe dma_brand_quarter)
        
        * Save parameter estimates
        gen alpha_nl_`x' = _b[price_normalized]  
        gen sigma_nl_`x' = _b[ln_sjg] 
        
        * Calculate alternative elasticities and diversion ratios
        gen ope_`x' = _b[price_normalized] * price_normalized * (1/(1-_b[ln_sjg]) - _b[ln_sjg]/(1-_b[ln_sjg])*exp(ln_sjg_`x') - shares_`x') 
        gen dso_dpj_`x' = -_b[price_normalized] * shares_`x' * outside_share_`x'
        gen e_j0_`x' = -_b[price_normalized] * shares_`x' * price_normalized        
    }
    
    save "${data_dir}/Demand_Results/NL_1344_`yr'_05_24_mktsize", replace
}
    
* Aggregate statistics by year
foreach yr of numlist `year_min'/`year_max' {
    di "Aggregating market size stats for `yr'"
    use ${data_dir}/Demand_Results/NL_1344_`yr'_05_24_mktsize.dta, clear
    keep year alpha_nl* sigma_nl* dso_dpj* e_j0*
    
    * Calculate means
    egen mean_dso_dpj = mean(dso_dpj)
    egen mean_dso_dpj_1 = mean(dso_dpj_1)
    egen mean_dso_dpj_2 = mean(dso_dpj_2)
    egen mean_e_j0 = mean(e_j0)
    egen mean_e_j0_1 = mean(e_j0_1)
    egen mean_e_j0_2 = mean(e_j0_2)
    
    drop dso_dpj* e_j0*
    keep if _n == 1
    keep year mean_*
    save "${data_dir}/Demand_Results/NL_1344_`yr'_05_24_mktsize_stats", replace
}

* Combine statistics across years
clear
foreach yr of numlist `year_min'/`year_max' {
    append using "${data_dir}/Demand_Results/NL_1344_`yr'_05_24_mktsize_stats"
}

* Table 8: Market Size Robustness
di "Generating Table 8"
file open table8 using "${table_dir}/Table8.tex", write replace
file write table8 "\\\toprule"_n
file write table8 "Year & \multicolumn{3}{c}{Diversion to Outside Option} & \multicolumn{3}{c}{Own-Price Elasticity} \\"_n
file write table8 " \cline{2-4} \cline{5-7}"_n
file write table8 " & Market Size 1.5 & Market Size 2.0 & Market Size 2.5 & Market Size 1.5 & Market Size 2.0 & Market Size 2.5 \tabularnewline \midrule"_n
file close table8

* Write results to table
sort year
local N = _N
forvalues i = 1(1)`N' {
    local yr = year[`i'] 
    local v1 = (round(mean_dso_dpj_1[`i'], 0.001))
    local v2 = (round(mean_dso_dpj[`i'], 0.001))
    local v3 = (round(mean_dso_dpj_2[`i'], 0.001))
    local v4 = (round(mean_e_j0_1[`i'], 0.001))
    local v5 = (round(mean_e_j0[`i'], 0.001))
    local v6 = (round(mean_e_j0_2[`i'], 0.001))
    
    local fmtv1 : display %16.3g `v1'
    local fmtv2 : display %16.3g `v2'
    local fmtv3 : display %16.3g `v3'
    local fmtv4 : display %16.3g `v4'
    local fmtv5 : display %16.3g `v5'
    local fmtv6 : display %16.3g `v6'
    
    file open table8 using "${table_dir}/Table8.tex", write append
    file write table8 "`yr' & `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4' & `fmtv5' & `fmtv6' \\ "_n 
    file close table8
}

file open table8 using "${table_dir}/Table8.tex", write append
file write table8 "\bottomrule"_n
file close table8


/***************************************************************************
* 8.3 ALTERNATIVE NESTING STRUCTURE ANALYSIS
* - Compares different approaches to product nesting
* - Tests sensitivity of demand and diversion to nesting structure
***************************************************************************/

local pm = 1344
local yr = 2006    

di "Alternative nesting for module `pm', year `yr'"
use "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24", clear

* Merge with alternative clustering methods
merge m:1 upc upc_ver using ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_singleHH, gen(_mergeSHH)    
drop if _mergeSHH != 3    
drop _mergeSHH

merge m:1 upc upc_ver using ${data_dir}/Nielsen/panel/clustering_`pm'_05_17_firsthalf, gen(_mergeFH)    
drop if _mergeFH != 3
drop _mergeFH

* Create count variables for different nesting structures
egen num_prods_nest_singleHH = count(price_norm), by(date store_c cluster_ids_singleHH)
egen num_prods_nest_firsthalf = count(price_norm), by(date store_c cluster_ids_firsthalf)
        
* Generate shares for alternative nesting structures
egen nest_quantity_norm_singleHH = sum(quantity_normalized), by(store_code date cluster_ids_singleHH)
gen ln_sjg_singleHH = log(quantity_norm/nest_quantity_norm_singleHH)

egen nest_quantity_norm_firsthalf = sum(quantity_normalized), by(store_code date cluster_ids_firsthalf)
gen ln_sjg_firsthalf = log(quantity_norm/nest_quantity_norm_firsthalf)

egen nest_quantity_norm_inside = sum(quantity_normalized), by(store_code date)
gen ln_sjg_inside = log(quantity_norm/nest_quantity_norm_inside)

* Estimate demand with single household nesting
ivreghdfe y_var (ln_sjg_singleHH price_normalized = num_prods num_prods_nest_singleHH z_hausman), absorb(upc_fe dma_brand_quarter)
gen alpha_nl_singleHH = _b[price_normalized]  
gen sigma_nl_singleHH = _b[ln_sjg] 
gen delta_nl_singleHH = y_var - _b[ln_sjg]*ln_sjg 

* Estimate demand with first half nesting
ivreghdfe y_var (ln_sjg_firsthalf price_normalized = num_prods num_prods_nest_firsthalf z_hausman), absorb(upc_fe dma_brand_quarter)
gen alpha_nl_firsthalf = _b[price_normalized]  
gen sigma_nl_firsthalf = _b[ln_sjg] 
gen delta_nl_firsthalf = y_var - _b[ln_sjg]*ln_sjg 

* Estimate demand with one nest for all products (standard logit)
ivreghdfe y_var (ln_sjg_inside price_normalized = num_prods z_hausman), absorb(upc_fe dma_brand_quarter)    
gen alpha_nl_inside = _b[price_normalized]  
gen sigma_nl_inside = _b[ln_sjg] 
gen delta_nl_inside = y_var - _b[ln_sjg]*ln_sjg 

* Estimate logit model (no nesting)
ivreghdfe y_var (price_normalized = num_prods z_hausman), absorb(upc_fe dma_brand_quarter)    
gen alpha_nl_logit = _b[price_normalized]  
gen delta_nl_logit = y_var 

* Estimate demand with brand nesting
egen nest_quantity_norm_brand = sum(quantity_normalized), by(store_code date brand_d)
gen ln_sjg_brand = log(quantity_norm/nest_quantity_norm_brand)

egen num_prods_nest_brand = count(price_norm), by(date store_c brand_descr)
ivreghdfe y_var (ln_sjg_brand price_normalized = num_prods num_prods_nest_brand z_hausman), absorb(upc_fe dma_brand_quarter)
gen alpha_nl_brand = _b[price_normalized]  
gen sigma_nl_brand = _b[ln_sjg] 
gen delta_nl_brand = y_var - _b[ln_sjg]*ln_sjg 
        
save "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering", replace
            
* Prepare data for supply model with different nesting structures
di "Preparing data for supply estimation with alternative nesting"
use "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering", clear

* Handle duplicates
duplicates tag upc upc_ver week store_code_uc, gen(dup)
egen sum_dup = sum(dup)

if sum_dup[1] != 0 {
    egen t_p_n = wtmean(price_normalized), by(upc upc_ver week store_code_uc) weight(quantity_normalized)
    egen t_s_n = sum(shares), by(upc upc_ver week store_code_uc) 
    egen t_q_n = sum(quantity_normalized), by(upc upc_ver week store_code_uc)
    replace price_normalized = t_p_n
    replace shares = t_s_n
    replace quantity_normalized = t_q_n 
    duplicates drop upc upc_ver week store_code_uc, force
}

* Create market identifiers
egen market_ids = group(store_code week_end)
    
* Add parent company information
merge m:1 product_module_code brand_descr using "${data_dir}/supplemental/brand_lookup.dta", gen(_merbrand)
drop if _merbrand == 2
replace parent_`yr' = -brand_code if _merbrand == 1

* Create firm and brand group identifiers
egen firm_ids = group(parent_`yr')
egen cluster_ids_brand = group(brand_descr)

* Create separate datasets for each nesting structure

preserve
* Main
rename price_normalized prices
rename shares shares
rename alpha_nl alpha
rename sigma_nl sigma
rename delta_nl delta
rename cluster_ids nesting_ids
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_main_`pm'_`yr'.csv", comma replace
restore


preserve
* Single household nesting
rename price_normalized prices
rename shares shares
rename alpha_nl_singleHH alpha
rename sigma_nl_singleHH sigma
rename delta_nl_singleHH delta
rename cluster_ids_singleHH nesting_ids
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_singleHH_`pm'_`yr'.csv", comma replace
restore

preserve
* First half nesting
rename price_normalized prices
rename shares shares
rename alpha_nl_firsthalf alpha
rename sigma_nl_firsthalf sigma
rename delta_nl_firsthalf delta
rename cluster_ids_firsthalf nesting_ids
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_firsthalf_`pm'_`yr'.csv", comma replace
restore

preserve
* Inside good nesting (one nest)
rename price_normalized prices
rename shares shares
rename alpha_nl_inside alpha
rename sigma_nl_inside sigma
rename delta_nl_inside delta
gen nesting_ids = 1
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_inside_`pm'_`yr'.csv", comma replace
restore

preserve
* Brand nesting
rename price_normalized prices
rename shares shares
rename alpha_nl_brand alpha
rename sigma_nl_brand sigma
rename delta_nl_brand delta
rename cluster_ids_brand nesting_ids
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_brand_`pm'_`yr'.csv", comma replace
restore

preserve
* Logit model (no nesting)
rename price_normalized prices
rename shares shares
rename alpha_nl_logit alpha
rename delta_nl_logit delta
gen nesting_ids = 1
gen sigma = 0
format upc %16.0g

keep store_code week_end upc upc_ver market_ids firm_ids nesting_ids prices shares sigma alpha delta
outsheet using "${data_dir}/Demand_Results/costs_logit_`pm'_`yr'.csv", comma replace
restore

* Run Python scripts for all nesting structures
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_main_"
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_singleHH_"
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_firsthalf_"
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_inside_"
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_brand_"
!python3 ${data_dir}/supplemental/computing_costs.py  `pm' `yr' "${data_dir}/Demand_Results/costs_logit_"



* Load supply estimates for different nesting structures
insheet using "${data_dir}/Demand_Results/costs_main_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_main", replace
        
insheet using "${data_dir}/Demand_Results/costs_brand_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_brand", replace

insheet using "${data_dir}/Demand_Results/costs_inside_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_inside", replace    

insheet using "${data_dir}/Demand_Results/costs_singleHH_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_singleHH", replace    

insheet using "${data_dir}/Demand_Results/costs_firsthalf_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_firsthalf", replace        

insheet using "${data_dir}/Demand_Results/costs_logit_`pm'_`yr'.csv", clear
save "${data_dir}/Demand_Results/costs_`pm'_`yr'_logit", replace        

* Combine markups across nesting structures
use "${data_dir}/Demand_Results/costs_`pm'_`yr'_main", clear            
gen markups_main = (prices - costs)/prices
drop prices costs

* Add single household nesting markups
merge 1:1 upc store_code week_end using "${data_dir}/Demand_Results/costs_`pm'_`yr'_singleHH"
drop _merge
gen markups_singleHH = (prices - costs)/prices
drop prices costs

* Add first half nesting markups
merge 1:1 upc store_code week_end using "${data_dir}/Demand_Results/costs_`pm'_`yr'_firsthalf"        
drop _merge
gen markups_firsthalf = (prices - costs)/prices
drop prices costs

* Add brand nesting markups
merge 1:1 upc store_code week_end using "${data_dir}/Demand_Results/costs_`pm'_`yr'_brand"        
drop _merge
gen markups_brand = (prices - costs)/prices
drop prices costs

* Add one-nest markups
merge 1:1 upc store_code week_end using "${data_dir}/Demand_Results/costs_`pm'_`yr'_inside"        
drop _merge
gen markups_inside = (prices - costs)/prices
drop prices costs

* Add logit model markups
merge 1:1 upc store_code week_end using "${data_dir}/Demand_Results/costs_`pm'_`yr'_logit"        
drop _merge
gen markups_logit = (prices - costs)/prices
drop prices costs

egen mean_markups_main = mean(markups_main)
egen mean_markups_singleHH = mean(markups_singleHH)
egen mean_markups_firsthalf = mean(markups_firsthalf)
egen mean_markups_brand = mean(markups_brand)
egen mean_markups_inside = mean(markups_inside)
egen mean_markups_logit = mean(markups_logit)

gen product_module_code = 1344

keep product_module_code mean_markups_* 
duplicates drop
save "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering_mmarkups", replace

* Add mean markups to original data
use "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering", clear
drop _merge
merge m:1 product_module_code using "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering_mmarkups"            
save "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering", replace

* Prepare data for diversion analysis
use "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_othclustering", clear

* Create market identifiers
egen market_ids = group(store_code week_end)
    
* Add parent company information
merge m:1 product_module_code brand_descr using "${data_dir}/supplemental/brand_lookup.dta", gen(_merbrand)
drop if _merbrand == 2
replace parent_`yr' = -brand_code if _merbrand == 1

* Create firm and brand identifiers
egen firm_ids = group(parent_`yr')
egen cluster_ids_brand = group(brand_descr)

* Create product group for top cereals
egen revenue = sum(price_norm*quantity_norm), by(upc upc_ver)
bys upc upc_ver: gen ind = _n
gsort -ind -revenue
gen t_rank = _n
gsort ind brand_d -revenue
bys ind brand_d: gen t_brandrank = _n
replace t_brandrank = . if ind != 1 
egen brandrank = mean(t_brandrank), by(upc upc_ver)

* Keep only top brands for diversion analysis
preserve
keep if brandrank == 1
keep if inlist(brand_d, "G M CINNAMON TOAST C", "G M LUCKY CHARMS", "G M CHEERIOS", "KEL RICE KRISPIES") |  inlist(brand_d, "KEL CORN FLAKES", "KEL COCOA KRISPIES", "POST COCOA PEBBLES")

* Rename main model variables
rename alpha_nl alpha_nl_main
rename sigma_nl sigma_nl_main
rename ln_sjg ln_sjg_main
rename ln_sjg_firsthalf ln_sjg_firsthalf
rename cluster_ids cluster_ids_main

* Create variables for inside goods and logit models
gen cluster_ids_inside = 1
gen cluster_ids_logit = 1
gen sigma_nl_logit = 0
gen ln_sjg_logit = 0

* Define list of nesting structures
local nest_list = "main singleHH firsthalf brand inside logit"

* Create product identifiers
egen product_cereal = group(brand_d)
levelsof product_cereal, local(levels)

* Create market-level variables by product
foreach l of local levels {
    gen tshares = shares if product_cereal == `l'
    egen shares_`l' = mean(tshares), by(market_ids)
    drop tshares
    
    foreach x in `nest_list' {
        gen tshares = ln_sjg_`x' if product_cereal == `l'
        egen ln_sjg_`l'_`x' = mean(tshares), by(market_ids)
        gen tcluster = cluster_ids_`x' if product_cereal == `l'
        egen cluster_ids_`l'_`x' = mean(tcluster), by(market_ids)
        drop tshares tcluster
    }
}

* Calculate own-price derivatives for each nesting structure
foreach x in `nest_list' {
    gen opd_`x' = alpha_nl_`x' * shares * ((1/(1-sigma_nl_`x')) - (sigma_nl_`x'/(1-sigma_nl_`x'))*exp(ln_sjg_`x') - shares)
}        

* Calculate cross-price derivatives for each nesting structure
levelsof product_cereal, local(levels)
foreach l of local levels {
    foreach x in `nest_list' {
        gen cpd_`l'_`x' = -alpha_nl_`x' * shares * shares_`l'
        replace cpd_`l'_`x' = cpd_`l'_`x' - alpha_nl_`x' * shares * (sigma_nl_`x'/(1-sigma_nl_`x')) * exp(ln_sjg_`l'_`x')  if cluster_ids_`x' == cluster_ids_`l'_`x'  
    }    
}

* Calculate diversion ratios
levelsof product_cereal, local(levels)
foreach l of local levels {
    foreach m of local levels {
        foreach x in `nest_list' {
            gen dr_`x'_`l'_`m' = cpd_`m'_`x'/opd_`x' if product_cereal == `l' 
        }
    }
}

* Collapse to one observation
collapse (mean) dr_* mean_markups*

* Create Table 9: Diversion Ratios Under Alternative Nesting Structures
di "Generating Table 9"
file open table9 using "${table_dir}/Table9.tex", write replace
file write table9 "\tiny{\begin{minipage}{0.99\textwidth}"_n
file write table9 "\begin{tabular}{lrrrrrr}\toprule"_n

local v1 = string(round(mean_markups_main[1], 0.01))

file write table9 "\multicolumn{7}{l}{\textbf{Panel A: Main Clustering} (mean markup = `v1') }\\\midrule "_n 
file write table9 " & 1 &  2 &3  & 4 & 5 & 6  \tabularnewline  \midrule"_n

* Write diversion ratios for main nesting
local v1 = string(round(-dr_main_1_1[1], 0.001))
local v2 = string(round(-dr_main_1_5[1], 0.001))
local v3 = string(round(-dr_main_1_4[1], 0.001))
local v4 = string(round(-dr_main_1_2[1], 0.001))
local v5 = string(round(-dr_main_1_3[1], 0.001))
local v6 = string(round(-dr_main_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_main_5_1[1], 0.001))
local v2 = string(round(-dr_main_5_5[1], 0.001))
local v3 = string(round(-dr_main_5_4[1], 0.001))
local v4 = string(round(-dr_main_5_2[1], 0.001))
local v5 = string(round(-dr_main_5_3[1], 0.001))
local v6 = string(round(-dr_main_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_main_4_1[1], 0.001))
local v2 = string(round(-dr_main_4_5[1], 0.001))
local v3 = string(round(-dr_main_4_4[1], 0.001))
local v4 = string(round(-dr_main_4_2[1], 0.001))
local v5 = string(round(-dr_main_4_3[1], 0.001))
local v6 = string(round(-dr_main_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_main_2_1[1], 0.001))
local v2 = string(round(-dr_main_2_5[1], 0.001))
local v3 = string(round(-dr_main_2_4[1], 0.001))
local v4 = string(round(-dr_main_2_2[1], 0.001))
local v5 = string(round(-dr_main_2_3[1], 0.001))
local v6 = string(round(-dr_main_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_main_3_1[1], 0.001))
local v2 = string(round(-dr_main_3_5[1], 0.001))
local v3 = string(round(-dr_main_3_4[1], 0.001))
local v4 = string(round(-dr_main_3_2[1], 0.001))
local v5 = string(round(-dr_main_3_3[1], 0.001))
local v6 = string(round(-dr_main_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_main_6_1[1], 0.001))
local v2 = string(round(-dr_main_6_5[1], 0.001))
local v3 = string(round(-dr_main_6_4[1], 0.001))
local v4 = string(round(-dr_main_6_2[1], 0.001))
local v5 = string(round(-dr_main_6_3[1], 0.001))
local v6 = string(round(-dr_main_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n

file write table9 "\bottomrule\end{tabular}\end{minipage}"_n


* Write diversion ratios for single household nesting

local v1 = string(round(mean_markups_singleHH[1], 0.01))

file write table9 "\begin{minipage}{0.9\textwidth}\begin{tabular}{lrrrrrr}\toprule\multicolumn{7}{l}{\textbf{Panel B: Single Household Clustering} (mean markup = `v1')}\\\midrule "_n 
file write table9 " & 1 & 2 & 3 & 4 & 5 & 6 \tabularnewline  \midrule"_n

local v1 = string(round(-dr_singleHH_1_1[1], 0.001))
local v2 = string(round(-dr_singleHH_1_5[1], 0.001))
local v3 = string(round(-dr_singleHH_1_4[1], 0.001))
local v4 = string(round(-dr_singleHH_1_2[1], 0.001))
local v5 = string(round(-dr_singleHH_1_3[1], 0.001))
local v6 = string(round(-dr_singleHH_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_singleHH_5_1[1], 0.001))
local v2 = string(round(-dr_singleHH_5_5[1], 0.001))
local v3 = string(round(-dr_singleHH_5_4[1], 0.001))
local v4 = string(round(-dr_singleHH_5_2[1], 0.001))
local v5 = string(round(-dr_singleHH_5_3[1], 0.001))
local v6 = string(round(-dr_singleHH_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_singleHH_4_1[1], 0.001))
local v2 = string(round(-dr_singleHH_4_5[1], 0.001))
local v3 = string(round(-dr_singleHH_4_4[1], 0.001))
local v4 = string(round(-dr_singleHH_4_2[1], 0.001))
local v5 = string(round(-dr_singleHH_4_3[1], 0.001))
local v6 = string(round(-dr_singleHH_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_singleHH_2_1[1], 0.001))
local v2 = string(round(-dr_singleHH_2_5[1], 0.001))
local v3 = string(round(-dr_singleHH_2_4[1], 0.001))
local v4 = string(round(-dr_singleHH_2_2[1], 0.001))
local v5 = string(round(-dr_singleHH_2_3[1], 0.001))
local v6 = string(round(-dr_singleHH_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_singleHH_3_1[1], 0.001))
local v2 = string(round(-dr_singleHH_3_5[1], 0.001))
local v3 = string(round(-dr_singleHH_3_4[1], 0.001))
local v4 = string(round(-dr_singleHH_3_2[1], 0.001))
local v5 = string(round(-dr_singleHH_3_3[1], 0.001))
local v6 = string(round(-dr_singleHH_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_singleHH_6_1[1], 0.001))
local v2 = string(round(-dr_singleHH_6_5[1], 0.001))
local v3 = string(round(-dr_singleHH_6_4[1], 0.001))
local v4 = string(round(-dr_singleHH_6_2[1], 0.001))
local v5 = string(round(-dr_singleHH_6_3[1], 0.001))
local v6 = string(round(-dr_singleHH_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n

file write table9 "\bottomrule\end{tabular}\end{minipage}"_n

* Write diversion ratios for first half nesting

local v1 = string(round(mean_markups_firsthalf[1], 0.01))

file write table9 "\begin{minipage}{0.9\textwidth}\begin{tabular}{lrrrrrr}\toprule\multicolumn{7}{l}{\textbf{Panel C: First Half Clustering} (mean markup = `v1')}\\\midrule "_n 
file write table9 " & 1 & 2 & 3 & 4 & 5 & 6 \tabularnewline  \midrule"_n

local v1 = string(round(-dr_firsthalf_1_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_1_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_1_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_1_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_1_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_firsthalf_5_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_5_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_5_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_5_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_5_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_firsthalf_4_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_4_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_4_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_4_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_4_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_firsthalf_2_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_2_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_2_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_2_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_2_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_firsthalf_3_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_3_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_3_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_3_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_3_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_firsthalf_6_1[1], 0.001))
local v2 = string(round(-dr_firsthalf_6_5[1], 0.001))
local v3 = string(round(-dr_firsthalf_6_4[1], 0.001))
local v4 = string(round(-dr_firsthalf_6_2[1], 0.001))
local v5 = string(round(-dr_firsthalf_6_3[1], 0.001))
local v6 = string(round(-dr_firsthalf_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n


file write table9 "\bottomrule\end{tabular}\end{minipage}"_n

* Write diversion ratios for brand nesting
local v1 = string(round(mean_markups_brand[1], 0.01))

file write table9 "\begin{minipage}{0.9\textwidth}\begin{tabular}{lrrrrrr}\toprule\multicolumn{7}{l}{\textbf{Panel D: Brand Clustering} (mean markup = `v1')}\\\midrule "_n 
file write table9 " & 1 & 2 & 3 & 4 & 5 & 6 \tabularnewline  \midrule"_n


local v1 = string(round(-dr_brand_1_1[1], 0.001))
local v2 = string(round(-dr_brand_1_5[1], 0.001))
local v3 = string(round(-dr_brand_1_4[1], 0.001))
local v4 = string(round(-dr_brand_1_2[1], 0.001))
local v5 = string(round(-dr_brand_1_3[1], 0.001))
local v6 = string(round(-dr_brand_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_brand_5_1[1], 0.001))
local v2 = string(round(-dr_brand_5_5[1], 0.001))
local v3 = string(round(-dr_brand_5_4[1], 0.001))
local v4 = string(round(-dr_brand_5_2[1], 0.001))
local v5 = string(round(-dr_brand_5_3[1], 0.001))
local v6 = string(round(-dr_brand_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_brand_4_1[1], 0.001))
local v2 = string(round(-dr_brand_4_5[1], 0.001))
local v3 = string(round(-dr_brand_4_4[1], 0.001))
local v4 = string(round(-dr_brand_4_2[1], 0.001))
local v5 = string(round(-dr_brand_4_3[1], 0.001))
local v6 = string(round(-dr_brand_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_brand_2_1[1], 0.001))
local v2 = string(round(-dr_brand_2_5[1], 0.001))
local v3 = string(round(-dr_brand_2_4[1], 0.001))
local v4 = string(round(-dr_brand_2_2[1], 0.001))
local v5 = string(round(-dr_brand_2_3[1], 0.001))
local v6 = string(round(-dr_brand_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_brand_3_1[1], 0.001))
local v2 = string(round(-dr_brand_3_5[1], 0.001))
local v3 = string(round(-dr_brand_3_4[1], 0.001))
local v4 = string(round(-dr_brand_3_2[1], 0.001))
local v5 = string(round(-dr_brand_3_3[1], 0.001))
local v6 = string(round(-dr_brand_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_brand_6_1[1], 0.001))
local v2 = string(round(-dr_brand_6_5[1], 0.001))
local v3 = string(round(-dr_brand_6_4[1], 0.001))
local v4 = string(round(-dr_brand_6_2[1], 0.001))
local v5 = string(round(-dr_brand_6_3[1], 0.001))
local v6 = string(round(-dr_brand_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n
file write table9 "\bottomrule\end{tabular}\end{minipage}"_n

* Write diversion ratios for inside goods nesting

local v1 = string(round(mean_markups_inside[1], 0.01))

file write table9 "\begin{minipage}{0.9\textwidth}\begin{tabular}{lrrrrrr}\toprule\multicolumn{7}{l}{\textbf{Panel E: Inside/Outside Clustering} (mean markup = `v1')}\\\midrule "_n 
file write table9 " & 1 & 2 & 3 & 4 & 5 & 6 \tabularnewline  \midrule"_n

local v1 = string(round(-dr_inside_1_1[1], 0.001))
local v2 = string(round(-dr_inside_1_5[1], 0.001))
local v3 = string(round(-dr_inside_1_4[1], 0.001))
local v4 = string(round(-dr_inside_1_2[1], 0.001))
local v5 = string(round(-dr_inside_1_3[1], 0.001))
local v6 = string(round(-dr_inside_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_inside_5_1[1], 0.001))
local v2 = string(round(-dr_inside_5_5[1], 0.001))
local v3 = string(round(-dr_inside_5_4[1], 0.001))
local v4 = string(round(-dr_inside_5_2[1], 0.001))
local v5 = string(round(-dr_inside_5_3[1], 0.001))
local v6 = string(round(-dr_inside_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_inside_4_1[1], 0.001))
local v2 = string(round(-dr_inside_4_5[1], 0.001))
local v3 = string(round(-dr_inside_4_4[1], 0.001))
local v4 = string(round(-dr_inside_4_2[1], 0.001))
local v5 = string(round(-dr_inside_4_3[1], 0.001))
local v6 = string(round(-dr_inside_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_inside_2_1[1], 0.001))
local v2 = string(round(-dr_inside_2_5[1], 0.001))
local v3 = string(round(-dr_inside_2_4[1], 0.001))
local v4 = string(round(-dr_inside_2_2[1], 0.001))
local v5 = string(round(-dr_inside_2_3[1], 0.001))
local v6 = string(round(-dr_inside_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_inside_3_1[1], 0.001))
local v2 = string(round(-dr_inside_3_5[1], 0.001))
local v3 = string(round(-dr_inside_3_4[1], 0.001))
local v4 = string(round(-dr_inside_3_2[1], 0.001))
local v5 = string(round(-dr_inside_3_3[1], 0.001))
local v6 = string(round(-dr_inside_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_inside_6_1[1], 0.001))
local v2 = string(round(-dr_inside_6_5[1], 0.001))
local v3 = string(round(-dr_inside_6_4[1], 0.001))
local v4 = string(round(-dr_inside_6_2[1], 0.001))
local v5 = string(round(-dr_inside_6_3[1], 0.001))
local v6 = string(round(-dr_inside_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n

file write table9 "\bottomrule\end{tabular}\end{minipage}"_n

* Write diversion ratios for logit model

local v1 = string(round(mean_markups_logit[1], 0.01))

file write table9 "\begin{minipage}{0.9\textwidth}\begin{tabular}{lrrrrrr}\toprule\multicolumn{7}{l}{\textbf{Panel F: No Clustering (Logit)} (mean markup = `v1')}\\\midrule "_n 
file write table9 " & 1 & 2 & 3 & 4 & 5 & 6 \tabularnewline  \midrule"_n

local v1 = string(round(-dr_logit_1_1[1], 0.001))
local v2 = string(round(-dr_logit_1_5[1], 0.001))
local v3 = string(round(-dr_logit_1_4[1], 0.001))
local v4 = string(round(-dr_logit_1_2[1], 0.001))
local v5 = string(round(-dr_logit_1_3[1], 0.001))
local v6 = string(round(-dr_logit_1_6[1], 0.001))
file write table9 "1. CHEERIOS & - & `v2' & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_logit_5_1[1], 0.001))
local v2 = string(round(-dr_logit_5_5[1], 0.001))
local v3 = string(round(-dr_logit_5_4[1], 0.001))
local v4 = string(round(-dr_logit_5_2[1], 0.001))
local v5 = string(round(-dr_logit_5_3[1], 0.001))
local v6 = string(round(-dr_logit_5_6[1], 0.001))
file write table9 "2. RICE KRISPIES & `v1' & - & `v3' & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_logit_4_1[1], 0.001))
local v2 = string(round(-dr_logit_4_5[1], 0.001))
local v3 = string(round(-dr_logit_4_4[1], 0.001))
local v4 = string(round(-dr_logit_4_2[1], 0.001))
local v5 = string(round(-dr_logit_4_3[1], 0.001))
local v6 = string(round(-dr_logit_4_6[1], 0.001))
file write table9 "3. CORN FLAKES & `v1' & `v2' & - & `v4'& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_logit_2_1[1], 0.001))
local v2 = string(round(-dr_logit_2_5[1], 0.001))
local v3 = string(round(-dr_logit_2_4[1], 0.001))
local v4 = string(round(-dr_logit_2_2[1], 0.001))
local v5 = string(round(-dr_logit_2_3[1], 0.001))
local v6 = string(round(-dr_logit_2_6[1], 0.001))
file write table9 "4. LUCKY CHARMS & `v1' & `v2' & `v3' & -& `v5' & `v6' \\ "_n

local v1 = string(round(-dr_logit_3_1[1], 0.001))
local v2 = string(round(-dr_logit_3_5[1], 0.001))
local v3 = string(round(-dr_logit_3_4[1], 0.001))
local v4 = string(round(-dr_logit_3_2[1], 0.001))
local v5 = string(round(-dr_logit_3_3[1], 0.001))
local v6 = string(round(-dr_logit_3_6[1], 0.001))
file write table9 "5. COCOA KRISPIES & `v1' & `v2' & `v3' & `v4' & - & `v6' \\ "_n

local v1 = string(round(-dr_logit_6_1[1], 0.001))
local v2 = string(round(-dr_logit_6_5[1], 0.001))
local v3 = string(round(-dr_logit_6_4[1], 0.001))
local v4 = string(round(-dr_logit_6_2[1], 0.001))
local v5 = string(round(-dr_logit_6_3[1], 0.001))
local v6 = string(round(-dr_logit_6_6[1], 0.001))
file write table9 "6. COCOA PEBBLES & `v1' & `v2' & `v3' & `v4' & `v5' & - \\ "_n
file write table9 "\bottomrule\end{tabular}\end{minipage}"_n
file write table9 "}"_n
file close table9


/***************************************************************************
* 8.4 HOUSEHOLD PANEL REPRESENTATION 
* - Analyzes expenditure patterns of Nielsen household panel
* - Creates Tables 12-13 showing panel representation
***************************************************************************/

* Record household total expenditure and trips by year
foreach yr of numlist `year_min'/`year_max' {
    di "Processing household expenditure for year `yr'"
    !gunzip -c $nielsen_panel_dir/`yr'/Annual_Files/trips_`yr'.tsv.gz > ${data_dir}/Nielsen/panel/trips_`yr'.tsv
    import delimited ${data_dir}/Nielsen/panel/trips_`yr'.tsv, clear
    
    * Flag stores in our sample
    merge m:1 store_code using ${data_dir}/Nielsen/movement/stores_subset
    drop if _merge == 2
    gen in_sample = _merge == 3
    drop _merge
    
    * Flag stores in scanner data
    merge m:1 store_code using ${data_dir}/Nielsen/stores/stores_`yr'.dta
    drop if _merge == 2
    gen in_scanner = _merge == 3
    drop _merge
    
    * Calculate expenditure by store type
    egen expenditure_total = sum(total_spent), by(household_code)
    egen expenditure_scanner = sum(total_spent) if in_scanner == 1, by(household_code)
    egen expenditure_sample = sum(total_spent) if in_sample == 1, by(household_code)
    
    * Calculate trips by store type
    egen trips_total = count(total_spent), by(household_code)
    egen trips_scanner = count(total_spent) if in_scanner == 1, by(household_code)
    egen trips_sample = count(total_spent) if in_sample == 1, by(household_code)
    
    * Keep household-level data
    duplicates drop household_code in_* expenditure_* trips_*, force
    keep household_code in_* expenditure_* trips_*
    save ${data_dir}/Nielsen/panel/trips_`yr'_hhexpenditure, replace
    !rm ${data_dir}/Nielsen/panel/trips_`yr'.tsv
}

* Merge household expenditure and trips to panelist file to get household income
foreach yr of numlist `year_min'/`year_max' {
    di "Merging with panelist data for year `yr'"
    import delimited ${data_dir}/Nielsen/panel/panelists_`yr'.tsv, clear
    rename household_cd household_code
    merge 1:m household_code using ${data_dir}/Nielsen/panel/trips_`yr'_hhexpenditure
    save ${data_dir}/Nielsen/panel/panelists_`yr'_hhexpenditure, replace
}    
    
* Combine panelist data across years
clear
foreach yr of numlist `year_min'/`year_max' {
    append using ${data_dir}/Nielsen/panel/panelists_`yr'_hhexpenditure
    !rm ${data_dir}/Nielsen/panel/panelists_`yr'.tsv
}        

* Generate statistics for Tables 12-13
gen texpenditure_sample = expenditure_sample if in_sample == 1
egen expenditure_sample_all = mean(texpenditure_sample), by(household_code panel_year)
gen texpenditure_scanner = expenditure_scanner if in_scanner == 1
egen expenditure_scanner_all = mean(texpenditure_scanner), by(household_code panel_year)
replace expenditure_scanner_all = 0 if expenditure_scanner_all == .
replace expenditure_sample_all = 0 if expenditure_sample_all == .

* Calculate fraction of expenditure in sample and scanner stores
gen frac_exp_sample = expenditure_sample_all/expenditure_total 
gen frac_exp_scanner = expenditure_scanner_all/expenditure_total 

* Generate trip statistics
gen ttrips_sample = trips_sample if in_sample == 1
egen trips_sample_all = mean(ttrips_sample), by(household_code panel_year)
gen ttrips_scanner = trips_scanner if in_scanner == 1
egen trips_scanner_all = mean(ttrips_scanner), by(household_code panel_year)
replace trips_scanner_all = 0 if trips_scanner_all == .
replace trips_sample_all = 0 if trips_sample_all == .

* Calculate fraction of trips in sample and scanner stores
gen frac_trp_sample = trips_sample_all/trips_total 
gen frac_trp_scanner = trips_scanner_all/trips_total 

* Keep one observation per household per year
bys household_code panel_year: gen hh_ind = _n == 1
drop if hh_ind != 1

* Create income group indicator
gen inc_below = household_income < 19    

* Calculate mean fractions by year
gen mean_frac_trips_scanner = .
gen mean_frac_trips_sample = .
gen mean_frac_exp_scanner = .
gen mean_frac_exp_sample = .

foreach yr of numlist `year_min_panel'/`year_max_panel' {
    sum frac_trp_scanner if panel_year == `yr'
    replace mean_frac_trips_scanner = r(mean) if panel_year == `yr'
    sum frac_trp_sample if panel_year == `yr'
    replace mean_frac_trips_sample = r(mean) if panel_year == `yr'
    sum frac_exp_scanner if panel_year == `yr'
    replace mean_frac_exp_scanner = r(mean) if panel_year == `yr'
    sum frac_exp_sample if panel_year == `yr'
    replace mean_frac_exp_sample = r(mean) if panel_year == `yr'
}

* Calculate statistics by income group
foreach x of numlist 0/1 {
    gen mean_frac_trips_scanner_`x' = .
    gen mean_frac_trips_sample_`x' = .
    gen mean_frac_exp_scanner_`x' = .
    gen mean_frac_exp_sample_`x' = .
    
    foreach yr of numlist `year_min_panel'/`year_max_panel' {
        sum frac_trp_scanner if panel_year == `yr' & inc_below == `x'
        replace mean_frac_trips_scanner_`x' = r(mean) if panel_year == `yr' 
        sum frac_trp_sample if panel_year == `yr' & inc_below == `x'
        replace mean_frac_trips_sample_`x' = r(mean) if panel_year == `yr' 
        sum frac_exp_scanner if panel_year == `yr' & inc_below == `x'
        replace mean_frac_exp_scanner_`x' = r(mean) if panel_year == `yr' 
        sum frac_exp_sample if panel_year == `yr' & inc_below == `x'
        replace mean_frac_exp_sample_`x' = r(mean) if panel_year == `yr' 
    } 
}

* Keep one observation per year
duplicates drop panel_year, force

* Table 12: Fraction of Household Expenditure by Store Type
di "Generating Table 12"
sort panel_year
file open tableA12 using "${table_dir}/TableA12_hhexp.tex", write replace
file write tableA12 "\setlength{\tabcolsep}{14.1pt}"_n 
file write tableA12 " \begin{tabular}{ccccccc}\\\toprule"_n 
file write tableA12 " & \multicolumn{3}{c}{In Scanner Data} & \multicolumn{3}{c}{In Sample}   \tabularnewline"_n 
file write tableA12 " \cmidrule(lr){2-4} \cmidrule(lr){5-7}"
file write tableA12 " & & \multicolumn{2}{c}{Household Income} &  &  \multicolumn{2}{c}{Household Income} \tabularnewline  "_n 
file write tableA12 " \cmidrule(lr){3-4} \cmidrule(lr){6-7} "_n 
file write tableA12 " Year  & All &  Below 50K & Above 50K & All &  Below 50K & Above 50K  \tabularnewline"_n 
file write tableA12 " \midrule"_n 
file close tableA12

* Write year-by-year expenditure statistics
local N = _N
forvalues i = 1(1)`N' {
    local yr = panel_year[`i']
    local v1 = (round(mean_frac_exp_scanner[`i'], 0.01))
    local v2 = (round(mean_frac_exp_scanner_1[`i'], 0.01))
    local v3 = (round(mean_frac_exp_scanner_0[`i'], 0.01))
    local v4 = (round(mean_frac_exp_sample[`i'], 0.01))
    local v5 = (round(mean_frac_exp_sample_1[`i'], 0.01))
    local v6 = (round(mean_frac_exp_sample_0[`i'], 0.01))
    
    local fmtv1 : display %4.2f `v1'
    local fmtv2 : display %4.2f `v2'
    local fmtv3 : display %4.2f `v3'
    local fmtv4 : display %4.2f `v4'
    local fmtv5 : display %4.2f `v5'
    local fmtv6 : display %4.2f `v6'
    
    file open tableA12 using "${table_dir}/TableA12_hhexp.tex", write append
    file write tableA12 "`yr'& `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' \\ "_n 
    file close tableA12
}

file open tableA12 using "${table_dir}/TableA12_hhexp.tex", write append
file write tableA12 "\bottomrule"_n 
file write tableA12 "\end{tabular}"
file close tableA12    

* Table 13: Fraction of Household Trips by Store Type
di "Generating Table 13"
sort panel_year
file open tableA13 using "${table_dir}/TableA13_hhtrp.tex", write replace
file write tableA13 "\setlength{\tabcolsep}{14.1pt}"_n 
file write tableA13 " \begin{tabular}{ccccccc}\\\toprule"_n 
file write tableA13 " & \multicolumn{3}{c}{In Scanner Data} & \multicolumn{3}{c}{In Sample}   \tabularnewline"_n 
file write tableA13 " \cmidrule(lr){2-4} \cmidrule(lr){5-7}"
file write tableA13 " & & \multicolumn{2}{c}{Household Income} &  &  \multicolumn{2}{c}{Household Income} \tabularnewline  "_n 
file write tableA13 " \cmidrule(lr){3-4} \cmidrule(lr){6-7} "_n 
file write tableA13 " Year  & All &  Below 50K & Above 50K & All &  Below 50K & Above 50K  \tabularnewline"_n 
file write tableA13 " \midrule"_n 
file close tableA13

* Write year-by-year trip statistics
local N = _N
forvalues i = 1(1)`N' {
    local yr = panel_year[`i']
    local v1 = (round(mean_frac_trips_scanner[`i'], 0.01))
    local v2 = (round(mean_frac_trips_scanner_1[`i'], 0.01))
    local v3 = (round(mean_frac_trips_scanner_0[`i'], 0.01))
    local v4 = (round(mean_frac_trips_sample[`i'], 0.01))
    local v5 = (round(mean_frac_trips_sample_1[`i'], 0.01))
    local v6 = (round(mean_frac_trips_sample_0[`i'], 0.01))
    
    local fmtv1 : display %4.2f `v1'
    local fmtv2 : display %4.2f `v2'
    local fmtv3 : display %4.2f `v3'
    local fmtv4 : display %4.2f `v4'
    local fmtv5 : display %4.2f `v5'
    local fmtv6 : display %4.2f `v6'
    
    file open tableA13 using "${table_dir}/TableA13_hhtrp.tex", write append
    file write tableA13 "`yr'& `fmtv1' & `fmtv2' & `fmtv3' & `fmtv4'& `fmtv5' & `fmtv6' \\ "_n 
    file close tableA13
}

file open tableA13 using "${table_dir}/TableA13_hhtrp.tex", write append
file write tableA13 "\bottomrule"_n 
file write tableA13 "\end{tabular}"
file close tableA13    


/***************************************************************************
* 8.5 OWNERSHIP CONCENTRATION ANALYSIS (FIGURE 10)
* - Analyzes changes in ownership concentration over time
* - Creates Figure 10 showing HHI comparison
***************************************************************************/

local module_list "1030 1044 1050 1105 1114 1120 1177 1257 1290 1293 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7734"

local year_list "2018"
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_list' {    
        di "Calculating HHI for module `pm', year `yr'"
        use ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24.dta, clear    
        
        * Add parent company information
        merge m:1 product_module_code brand_descr using "${data_dir}/supplemental/brand_lookup.dta", gen(_merge_brand)
        drop if _merge_brand == 2
        replace parent_2018 = -brand_code if _merge_brand == 1
        replace parent_2006 = -brand_code if _merge_brand == 1
        
        * Calculate revenue shares and HHI
        egen product_revenue = sum(quantity * price), by(upc upc_ver)
        egen firm_revenue_2018 = sum(quantity * price), by(parent_2018)
        egen firm_revenue_2006 = sum(quantity * price), by(parent_2006)
        egen total_revenue = sum(quantity * price)
        
        * Calculate squared market shares
        gen r2_18 = (firm_revenue_2018/total_revenue)^2
        gen r2_06 = (firm_revenue_2006/total_revenue)^2
        bys parent_2018: gen rank_2018 = _n
        bys parent_2006: gen rank_2006 = _n
        
        * Sum HHI components
        egen tHHI_18 = sum(r2_18) if rank_2018 == 1
        egen tHHI_06 = sum(r2_06) if rank_2006 == 1
        
        egen HHI_18 = mean(tHHI_18)
        egen HHI_06 = mean(tHHI_06)
        
         * Keep module-level HHI
        keep if _n == 1
        keep product_module_code product_module_d HHI*
        save ${data_dir}/Demand_Results/HHI_`pm'_`yr', replace
    }
}

* Combine HHI statistics across modules
clear

local year_list "2018"
foreach pm of numlist `module_list' {    
    foreach yr of numlist `year_list' {    
        append using ${data_dir}/Demand_Results/HHI_`pm'_`yr'
    }
}

* Create Figure 10: HHI Comparison
di "Generating Figure 10"

graph twoway (scatter HHI_06 HHI_18 ,  msymbol("x") mcolor("black")) , xlabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall)) ylabel(0 "{stSerif:0}" .2 "{stSerif:.2}" .4 "{stSerif:.4}" .6 "{stSerif:.6}" .8 "{stSerif:.8}" 1 "{stSerif:1}", nogrid labsize(medsmall))  xtitle("{stSerif:HHI Based on 2018 Ownership}",size(medsmall))  ytitle("{stSerif:HHI Based on 2006 Ownership}", size(medsmall)) aspectratio(.8) legend(off)
graph export ${table_dir}/hhi_in_2006_vs_hhi_in_2018.png, replace
graph export ${table_dir}/hhi_in_2006_vs_hhi_in_2018.eps, replace





/***************************************************************************
* 8.6 SHRINKFLATION ANALYSIS
* - Examines changes in package sizes for the same products over time
* - Quantifies the extent of "shrinkflation"
***************************************************************************/

* Reset the module list to only include 70 modules with valid demand parameters
local module_list "1030 1044 1050 1105 1114 1120 1177 1257 1290 1293 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7734"



* Process shrinkflation for each module group
foreach pm of numlist `module_list' {
    di "Analyzing shrinkflation for module `pm'"
    
    * Load 2006 and 2018 data
    use "${data_dir}/Demand_Results/NL_`pm'_2006_05_24", clear
    append using "${data_dir}/Demand_Results/NL_`pm'_2018_05_24"
    drop _merge
    
    * Calculate total quantity by UPC and year
    egen tot_q = sum(quantity), by(year upc upc_ver)
    duplicates drop year upc upc_ver, force
    
    * Get UPC rankings within brand
    bys brand_d year: gen q_rank = _n
    
    * Create variables for analysis
    egen myear = mean(year), by(upc upc_ver)
    egen upc_count = count(upc), by(year)
    egen upc_count_myear = count(upc), by(myear)
    
    * Filter and clean data
    drop if myear == 2012
    replace upc_descr = strtrim(upc_descr)
    replace upc_descr = strupper(upc_descr)
    
    * Keep relevant variables
    keep upc* brand* tot_q multi size1_a year myear product_module_code
    
    * Create size and sales rankings
    sort year upc_descr size1_a
    bys year upc_descr: gen size_rank = _n 
    gsort year upc_descr -tot_q
    bys year upc_descr: gen sales_rank = _n 
    
    * Save 2006 UPC information
    preserve
    keep if year == 2006
    rename size1_a size_2006
    rename tot_q tot_q_2006
    rename size_rank size_rank_2006
    rename sales_rank sales_rank_2006
    rename upc_count upc_count_2006
    save "${data_dir}/Demand_Results/NL_`pm'_2006_shrink", replace
    restore 
    
    * Keep 2018 data for comparison
    keep if year == 2018
    preserve
    drop if _n != 1
    gen min_diff = .
    list
    save "${data_dir}/Demand_Results/NL_`pm'_2006_2018_shrink", replace
    restore
    
    * Join with 2006 data by UPC description and multi-pack status
    joinby upc_descr multi using "${data_dir}/Demand_Results/NL_`pm'_2006_shrink", _merge(_merge)
    
    * Process matched products
    if _N > 0 {
        di "Processing matched products"
        sort upc_descr size_rank size_rank_2006
        
        * Calculate size differences
        gen diff = size1_a - size_2006
        egen min_diff = min(abs(diff)), by(upc_descr multi size1_a)
        replace min_diff = -min_diff if min_diff == -diff
        gen p_diff = diff/size_2006 
        
        * Calculate size differences by ranking method
        gen tmin_diff_rank = diff if size_rank == size_rank_2006
        gen tp_diff_rank = diff/size_2006 if size_rank == size_rank_2006
        egen min_diff_rank = mean(tmin_diff_rank), by(upc_descr multi size1_a)
        egen p_diff_rank = mean(tp_diff_rank), by(upc_descr multi size1_a)
        
        * Calculate size differences for top-selling UPCs
        gen tmin_diff_sales = diff if sales_rank == 1 & sales_rank_2006 == 1
        gen tp_diff_sales = diff/size_2006 if sales_rank == 1 & sales_rank_2006 == 1
        egen min_diff_sales = mean(tmin_diff_sales), by(upc_descr multi size1_a)
        egen p_diff_sales = mean(tp_diff_sales), by(upc_descr multi size1_a)
        
        * Keep closest matches
        keep if diff == min_diff
        drop upc_fe brand_code _merge myear tmin* diff tp*
        
        save "${data_dir}/Demand_Results/NL_`pm'_2006_2018_shrink", replace
    }
}

* Combine shrinkflation results across modules
clear

foreach pm of numlist `module_list' {
    capture confirm file "${data_dir}/Demand_Results/NL_`pm'_2006_2018_shrink.dta"
    if _rc == 0 {
        append using "${data_dir}/Demand_Results/NL_`pm'_2006_2018_shrink"
    }
}

* Calculate statistics for UPC matching
gen num_upc_2018 = upc_count



* Calculate percentage change statistics
local list "p_diff p_diff_rank p_diff_sales"
foreach y of varlist `list' {

    * Create indicators for different size change categories
    gen t`y'_20 = `y' < 0 & `y' >= -0.2 & `y' != .

    * Count UPCs in each category
    egen sfrac_`y'_20 = sum(t`y'_20) if `y' != ., by(product_module_code)

    * Get module-level counts
    egen count_`y'_20 = mean(sfrac_`y'_20), by(product_module_code)
	gen totfrac_`y'_20 = count_`y'_20/num_upc_2018  
}

* Keep module-level statistics
keep product_module* totfrac*
duplicates drop 
merge 1:m product_module_code using ${data_dir}/Nielsen/movement/products
keep if _merge == 3
keep product_module* totfrac*
duplicates drop

save "${data_dir}/Demand_Results/NL_2006_2018_shrink", replace

* Appendix Table A21 (Shrinkflation)
di "Generating Table A21"
use "${data_dir}/Demand_Results/NL_2006_2018_shrink", clear
gen pd = proper(product_module_d)
replace pd = regexr(pd, "&", "\&")
sort pd 


* Create Table A21
file open tableA21 using "${table_dir}/TableA21shrink.tex", write replace
file write tableA21 " \\\toprule"_n 
file write tableA21 "  Product Market & $\,\,\,\,\,\,\,\,\,\,\,\,$CT$\,\,\,\,\,\,\,\,\,\,\,\,\,\,$ & $\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,$SR$\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,$ & $\,\,\,\,\,\,\,$TS  $\,\,\,\,\,\,\,\,$\tabularnewline\midrule"_n
file write tableA21 "\endfirsthead"_n
file write tableA21 "\toprule"_n
file write tableA21 "  Product Market & $\,\,\,\,\,\,\,\,\,\,\,\,$CT$\,\,\,\,\,\,\,\,\,\,\,\,\,\,$ & $\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,$SR$\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,$ & $\,\,\,\,\,\,\,$TS  $\,\,\,\,\,\,\,\,$\tabularnewline\midrule"_n
file write tableA21 "\endhead "_n
file write tableA21 "\midrule \multicolumn{4}{r}{{Continued on next page}} \\ \bottomrule"_n
file write tableA21 "\endfoot"_n
file write tableA21 "\endlastfoot"_n
file close tableA21

* Write shrinkflation statistics
local N = _N
forvalues i = 1(1)`N' {
	local pd = pd[`i']
	
	local v1 = (round(totfrac_p_diff_20[`i'], 0.001))
	local v2 = (round(totfrac_p_diff_rank_20[`i'], 0.001))
	local v3 = (round(totfrac_p_diff_sales_20[`i'], 0.001))
	
	local fmtv1 : display %4.2f `v1'
	local fmtv2 : display %4.2f `v2'
	local fmtv3 : display %4.2f `v3'
	
	file open tableA21 using "${table_dir}/TableA21shrink.tex", write append
	file write tableA21 "`pd' & `fmtv1' & `fmtv2' & `fmtv3' \\ "_n 
	file close tableA21

}

file open tableA21 using "${table_dir}/TableA21shrink.tex", write append
file write tableA21 "\bottomrule"_n
file close tableA21

/***************************************************************************
* 8.7 PRODUCT TURNOVER AND MARKUP DETERMINANTS
* - Analyzes the determinants of markup changes
* - Examines product entry and exit patterns
***************************************************************************/

* Analyze UPC-level markup changes from 2006 to 2018
clear

* Reset the module list to only include 70 modules with valid demand parameters
local module_list "1030 1044 1050 1105 1114 1120 1177 1257 1290 1293 1320 1323 1326 1327 1330 1331 1334 1340 1341 1344 1360 1362 1425 1441 1452 1455 1461 1463 1484 1487 1493 1508 1532 1553 2621 2623 2624 2629 2631 2634 2672 2675 3550 3561 3572 3574 3576 3577 3578 3583 3590 3592 3603 3605 3618 3625 4000 4001 4002 4003 4004 4005 4006 5000 5010 5050 7012 7260 7340 7734"


global data_dir "/project/sullivan/markups/Data"

*Compute year level markups and potential correlates of markup changes
local year_list "2006 2018" 
foreach pm of numlist `module_list' {
	foreach yr of numlist `year_list' {
		disp(`pm')
		
		use "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_clusterSE", clear	
		gen markups = (price_normalized - costs_nl)/(price_normalized)
		gen revenue = price_norm*quantity_norm
		egen num_prod_mkt = count(upc), by(store_code week)
		egen num_prod_mkt_cluster = count(upc), by(store_code week cluster_id)
		egen tot_q = sum(quantity_normalized), by(upc)
		egen num_mkts = count(quantity_norm), by(upc)

		drop _merge
		merge m:1 product_module_code brand_descr using "/project/markups/Data/NewRep/supplemental/brand_lookup.dta"
		drop if _merge == 2
		gen firm_id = parent_2006 if year == 2006
		replace firm_id = parent_2018 if year == 2018
		replace firm_id = - brand_code if _merge == 1
		egen fid = group(firm_id)
			
		egen avgall_share = mean(shares), by(upc)
		egen avgall_inside_share = mean(exp(ln_sjg)), by(upc)
		egen avgall_price = mean(price_norm), by(upc)
		egen avgall_markups = mean(markups)  , by(upc)
		egen avgall_mkt_size = mean(market_size), by(upc)
		egen avgall_mkt_prods = mean(num_prod_mkt), by(upc)

		egen denom1 = sum(1/num_mkts), by(upc)
		egen tavgall_tot_q = sum(tot_q/num_mkts), by(upc)
		egen tavgall_num_mkts = sum(num_mkts/num_mkts), by(upc)

		gen avgall_tot_q = tavgall_tot_q/denom1
		gen avgall_num_mkts = tavgall_num_mkts/denom1

		rename denom1 avgall_numupc

		order product_module* avgall* alpha sigma
		keep product_module* avgall* alpha sigma upc* brand* fid year
		duplicates drop
		save "${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_stats_mkpreg_upc_1", replace	
	}

}   


clear
* Add data for other modules
foreach pm of numlist `module_list' {
    di "Adding UPC stats for module `pm'"
    foreach yr of numlist `year_list' {
        append using ${data_dir}/Demand_Results/NL_`pm'_`yr'_05_24_stats_mkpreg_upc_1
        *replace year = `yr' if year == .
    }
}

* Calculate firm and UPC counts
egen num_upc = count(upc), by(product_module_code year)
egen num_firm_upc = count(upc), by(product_module_code year fid)

* Prepare data for analysis
drop upc_ver upc_descr* upc_fe fid brand*
reshape wide avgall* num* alpha sigma, i(upc product_module*) j(year)

* Calculate markup change
gen mkp_chg = avgall_markups2018 - avgall_markups2006

* Calculate changes in other variables
foreach x in avgall_share avgall_inside_share avgall_price avgall_markups avgall_mkt_size avgall_mkt_prods avgall_tot_q avgall_num_mkts alpha_nl sigma_nl num_upc num_firm_upc {
    gen chg_`x' = `x'2018 - `x'2006
}

* Create indicators for product entry and exit
gen dropped = 1 if avgall_markups2018 == .
replace dropped = 0 if mkp_chg != .
gen added = 1 if avgall_markups2006 == .
replace added = 0 if mkp_chg != .


*Create Table 10
file open tableA10 using "${table_dir}/Table10.tex", write replace
file write tableA10 " \toprule\\"_n 
file write tableA10 "  & Corr($\Delta$Markup, & Corr($\Delta$Markup, & Corr(Enter by  & Corr(Exit by  \\ & $x_{2006}$)          & $x_{2018}-x_{2006}$)   & 2018, $x_{2018}$)       & 2018, $x_{2006}$) \\ $x$         & (1) & (2) & (3) & (4)\\"_n
file write tableA10 " \midrule"_n
file close tableA10

local i = 0
local j = 1
foreach x in avgall_share avgall_inside_share avgall_price avgall_markups avgall_mkt_size avgall_mkt_prods avgall_tot_q avgall_num_mkts alpha_nl sigma_nl num_upc num_firm_upc {
    local i = `i'+1
	corr mkp_chg `x'2006 if avgall_markups2006 > 0 & avgall_markups2006 < 1 & avgall_markups2018 > 0 & avgall_markups2018 < 1
	local v_`i'_`j' = (round(r(C)[1,2], 0.001))
	local fmtv_`i'_`j' : display %4.3f `v_`i'_`j''
	
	}

	
local i = 0
local j = 2
foreach x in avgall_share avgall_inside_share avgall_price avgall_markups avgall_mkt_size avgall_mkt_prods avgall_tot_q avgall_num_mkts alpha_nl sigma_nl num_upc num_firm_upc {
    local i = `i'+1
	corr mkp_chg chg_`x' if avgall_markups2006 > 0 & avgall_markups2006 < 1 & avgall_markups2018 > 0 & avgall_markups2018 < 1
	local v_`i'_`j' = (round(r(C)[1,2], 0.001))
	local fmtv_`i'_`j' : display %4.3f `v_`i'_`j''
	
	}
	

local i = 0
local j = 3
foreach x in avgall_share avgall_inside_share avgall_price avgall_markups avgall_mkt_size avgall_mkt_prods avgall_tot_q avgall_num_mkts alpha_nl sigma_nl num_upc num_firm_upc {
    local i = `i'+1
	corr added `x'2018 if  avgall_markups2018 > 0 & avgall_markups2018 < 1
	local v_`i'_`j' = (round(r(C)[1,2], 0.001))
	local fmtv_`i'_`j' : display %4.3f `v_`i'_`j''
	
	}



local i = 0
local j = 4
foreach x in avgall_share avgall_inside_share avgall_price avgall_markups avgall_mkt_size avgall_mkt_prods avgall_tot_q avgall_num_mkts alpha_nl sigma_nl num_upc num_firm_upc {
    local i = `i'+1
	corr dropped `x'2006 if avgall_markups2006 > 0 & avgall_markups2006 < 1 
	local v_`i'_`j' = (round(r(C)[1,2], 0.001))
	local fmtv_`i'_`j' : display %4.3f `v_`i'_`j''
	
	}
file open tableA10 using "${table_dir}/Table10.tex", write append
 
local v_1_0 = "Share"
local v_2_0 = "Inside Share"
local v_3_0 = "Price"
local v_4_0 = "Markups"
local v_5_0 = "Market size"
local v_6_0 = "No. UPCs in market"
local v_7_0 = "Total Quantity"
local v_8_0 = "No. markets UPC sold in"
local v_9_0 = "$\alpha$"
local v_10_0 = "$\sigma$"
local v_11_0 = "No. UPCs in module"
local v_12_0 = "No. UPCs sold by firm"
 
forvalues i = 1(1)12 {	
	    file write tableA10 "`v_`i'_0' & `fmtv_`i'_1' & `fmtv_`i'_2' & `fmtv_`i'_3' & `fmtv_`i'_4' \\ "_n 
}
file write tableA10 "\bottomrule\\ "_n 
file close tableA10




* Table 11: Module-Level Correlates of Price Coefficient Changes
di "Analyzing module-level determinants for Table 12"

preserve

collapse alpha*  num_upc*  dropped added, by(product_module*)
gen chg_alpha_nl = alpha_nl2018 - alpha_nl2006
gen chg_num_upc = num_upc2018 - num_upc2006
merge 1:1 product_module_code using "${data_dir}/Demand_Results/NL_2006_2018_shrink"
drop if _merge == 2




* Adjust size change variables for modules without matches
local list "p_diff p_diff_rank p_diff_sales"
foreach y in `list' {
    replace frac_`y'_neg = 0 if num_upc_join == 0
    replace frac_`y'_pos = 0 if num_upc_join == 0
    replace frac_`y'_zero = 0 if num_upc_join == 0
    replace frac_`y'_20 = 0 if num_upc_join == 0
    replace frac_`y'_10 = 0 if num_upc_join == 0
    replace frac_`y'_05 = 0 if num_upc_join == 0

    replace count_`y'_neg = 0 if num_upc_join == 0
    replace count_`y'_pos = 0 if num_upc_join == 0
    replace count_`y'_zero = 0 if num_upc_join == 0
    replace count_`y'_20 = 0 if num_upc_join == 0
    replace count_`y'_10 = 0 if num_upc_join == 0
    replace count_`y'_05 = 0 if num_upc_join == 0
}

* Calculate fraction of UPCs with size reductions >20%
local list "p_diff p_diff_rank p_diff_sales"
foreach y in `list' {
    gen totfrac_`y'_neg = count_`y'_neg/num_upc_2018 
    gen totfrac_`y'_pos = count_`y'_pos/num_upc_2018 
    gen totfrac_`y'_zero = (count_`y'_zero + num_upc_unchg)/num_upc_2018
    gen totfrac_`y'_20 = count_`y'_20/num_upc_2018   
    gen totfrac_`y'_10 = count_`y'_10/num_upc_2018  
    gen totfrac_`y'_05 = count_`y'_05/num_upc_2018  
}


replace totfrac_p_diff_rank_20 = 0 if totfrac_p_diff_20 == .
replace totfrac_p_diff_sales_20 = 0 if totfrac_p_diff_20 == .
replace totfrac_p_diff_20 = 0 if totfrac_p_diff_20 == .



* Write table
file open tableA11 using "${table_dir}/Table11.tex", write replace
file write tableA11 " \toprule\\"_n 
file write tableA11 "  \multicolumn{2}{l}{Panel A: Measures of Product Assortment }&\,\,\,\,\,\,\,\, &  \multicolumn{2}{l}{Panel B: Measures of Shrinkflaton } \\\cmidrule(lr){1-2}  \cmidrule(lr){4-5} Variable Name & Corr($\Delta \alpha$, Variable) & & Variable Name & Corr($\Delta \alpha$, Variable)\\ \midrule "_n
file write tableA11 " \midrule"_n
file close tableA11




local v_1_0 = "Chg. in Number of UPCs"
local v_2_0 = "Frac. UPCs Enter after 2006"
local v_3_0 = "Frac. UPCs Exit by 2018"

local v_1_2 = "Closest to Metric"
local v_2_2 = "Size Rank Metric"
local v_3_2 = "Top Selling Metric"


* Analyze correlation with shrinkflation
corr chg_alpha_nl chg_num_upc dropped added totfrac_p_diff_20 totfrac_p_diff_rank_20 totfrac_p_diff_sales_20 

local v_1_1 = (round(r(C)[1,2], 0.001))
local fmtv_1_1 : display %4.3f `v_1_1'
local v_2_1 = (round(r(C)[1,4], 0.001))
local fmtv_2_1 : display %4.3f `v_2_1'
local v_3_1 = (round(r(C)[1,3], 0.001))
local fmtv_3_1 : display %4.3f `v_3_1'
local v_1_4 = (round(r(C)[1,5], 0.001))
local fmtv_1_4 : display %4.3f `v_1_4'
local v_2_4 = (round(r(C)[1,6], 0.001))
local fmtv_2_4 : display %4.3f `v_2_4'
local v_3_4 = (round(r(C)[1,7], 0.001))
local fmtv_3_4 : display %4.3f `v_3_4'


        
file open tableA11 using "${table_dir}/Table11.tex", write append
forvalues i = 1(1)3 {	
	    file write tableA11 "`v_`i'_0' & `fmtv_`i'_1' && `v_`i'_2'  & `fmtv_`i'_4' \\ "_n 
}
file write tableA11 "\bottomrule\\ "_n 
file close tableA11

